1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "double-int.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "hard-reg-set.h"
43 #include "dominance.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
50 #include "gimple-expr.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "gimple-ssa.h"
58 #include "tree-phinodes.h"
59 #include "ssa-iterators.h"
60 #include "stringpool.h"
61 #include "tree-ssanames.h"
62 #include "tree-ssa-loop-manip.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-scalar-evolution.h"
69 #include "statistics.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
81 #include "recog.h" /* FIXME: for insn_data */
82 #include "insn-codes.h"
84 #include "diagnostic-core.h"
85 #include "tree-vectorizer.h"
87 #include "plugin-api.h"
92 /* For lang_hooks.types.type_for_mode. */
93 #include "langhooks.h"
95 /* Return the vectorized type for the given statement. */
98 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
100 return STMT_VINFO_VECTYPE (stmt_info
);
103 /* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
106 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
108 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
109 basic_block bb
= gimple_bb (stmt
);
110 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
116 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
118 return (bb
->loop_father
== loop
->inner
);
121 /* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
126 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
127 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
128 int misalign
, enum vect_cost_model_location where
)
132 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
133 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
134 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
137 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
142 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
143 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
144 void *target_cost_data
;
147 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
149 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
151 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
156 /* Return a variable of type ELEM_TYPE[NELEMS]. */
159 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
161 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
165 /* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
171 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
172 tree array
, unsigned HOST_WIDE_INT n
)
174 tree vect_type
, vect
, vect_name
, array_ref
;
177 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
178 vect_type
= TREE_TYPE (TREE_TYPE (array
));
179 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
180 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
181 build_int_cst (size_type_node
, n
),
182 NULL_TREE
, NULL_TREE
);
184 new_stmt
= gimple_build_assign (vect
, array_ref
);
185 vect_name
= make_ssa_name (vect
, new_stmt
);
186 gimple_assign_set_lhs (new_stmt
, vect_name
);
187 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
192 /* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
197 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
198 tree array
, unsigned HOST_WIDE_INT n
)
203 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
204 build_int_cst (size_type_node
, n
),
205 NULL_TREE
, NULL_TREE
);
207 new_stmt
= gimple_build_assign (array_ref
, vect
);
208 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
211 /* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
216 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
218 tree mem_ref
, alias_ptr_type
;
220 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
221 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
222 /* Arrays have the same alignment as their type. */
223 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
227 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
229 /* Function vect_mark_relevant.
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
234 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
235 enum vect_relevant relevant
, bool live_p
,
236 bool used_in_pattern
)
238 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
239 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
240 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "mark relevant %d, live %d.\n", relevant
, live_p
);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
254 if (!used_in_pattern
)
256 imm_use_iterator imm_iter
;
260 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
261 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
263 if (is_gimple_assign (stmt
))
264 lhs
= gimple_assign_lhs (stmt
);
266 lhs
= gimple_call_lhs (stmt
);
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
271 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
272 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
274 if (is_gimple_debug (USE_STMT (use_p
)))
276 use_stmt
= USE_STMT (use_p
);
278 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
281 if (vinfo_for_stmt (use_stmt
)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
297 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE
, vect_location
,
301 "last stmt in pattern. don't mark"
302 " relevant/live.\n");
303 stmt_info
= vinfo_for_stmt (pattern_stmt
);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
305 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
306 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
311 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
312 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
313 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
315 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE
, vect_location
,
320 "already marked relevant/live.\n");
324 worklist
->safe_push (stmt
);
328 /* Function vect_stmt_relevant_p.
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
338 CHECKME: what other side effects would the vectorizer allow? */
341 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
342 enum vect_relevant
*relevant
, bool *live_p
)
344 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
346 imm_use_iterator imm_iter
;
350 *relevant
= vect_unused_in_scope
;
353 /* cond stmt other than loop exit cond. */
354 if (is_ctrl_stmt (stmt
)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
356 != loop_exit_ctrl_vec_info_type
)
357 *relevant
= vect_used_in_scope
;
359 /* changing memory. */
360 if (gimple_code (stmt
) != GIMPLE_PHI
)
361 if (gimple_vdef (stmt
)
362 && !gimple_clobber_p (stmt
))
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_NOTE
, vect_location
,
366 "vec_stmt_relevant_p: stmt has vdefs.\n");
367 *relevant
= vect_used_in_scope
;
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
373 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
375 basic_block bb
= gimple_bb (USE_STMT (use_p
));
376 if (!flow_bb_inside_loop_p (loop
, bb
))
378 if (dump_enabled_p ())
379 dump_printf_loc (MSG_NOTE
, vect_location
,
380 "vec_stmt_relevant_p: used out of loop.\n");
382 if (is_gimple_debug (USE_STMT (use_p
)))
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
388 gcc_assert (bb
== single_exit (loop
)->dest
);
395 return (*live_p
|| *relevant
);
399 /* Function exist_non_indexing_operands_for_use_p
401 USE is one of the uses attached to STMT. Check if USE is
402 used in STMT for anything other than indexing an array. */
405 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
408 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
410 /* USE corresponds to some operand in STMT. If there is no data
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info
))
416 /* STMT has a data_ref. FORNOW this means that its of one of
420 (This should have been verified in analyze_data_refs).
422 'var' in the second case corresponds to a def, not a use,
423 so USE cannot correspond to any operands that are not used
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
429 if (!gimple_assign_copy_p (stmt
))
431 if (is_gimple_call (stmt
)
432 && gimple_call_internal_p (stmt
))
433 switch (gimple_call_internal_fn (stmt
))
436 operand
= gimple_call_arg (stmt
, 3);
441 operand
= gimple_call_arg (stmt
, 2);
451 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
453 operand
= gimple_assign_rhs1 (stmt
);
454 if (TREE_CODE (operand
) != SSA_NAME
)
465 Function process_use.
468 - a USE in STMT in a loop represented by LOOP_VINFO
469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470 that defined USE. This is done by calling mark_relevant and passing it
471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
481 - case 1: If USE is used only for address computations (e.g. array indexing),
482 which does not need to be directly vectorized, then the liveness/relevance
483 of the respective DEF_STMT is left unchanged.
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
489 Return true if everything is as expected. Return false otherwise. */
492 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
493 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
496 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
497 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
498 stmt_vec_info dstmt_vinfo
;
499 basic_block bb
, def_bb
;
502 enum vect_def_type dt
;
504 /* case 1: we are only interested in uses that need to be vectorized. Uses
505 that are used for address computation are not considered relevant. */
506 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
509 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
513 "not vectorized: unsupported use in stmt.\n");
517 if (!def_stmt
|| gimple_nop_p (def_stmt
))
520 def_bb
= gimple_bb (def_stmt
);
521 if (!flow_bb_inside_loop_p (loop
, def_bb
))
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
534 bb
= gimple_bb (stmt
);
535 if (gimple_code (stmt
) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
537 && gimple_code (def_stmt
) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
539 && bb
->loop_father
== def_bb
->loop_father
)
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE
, vect_location
,
543 "reduc-stmt defining reduc-phi in the same nest.\n");
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
545 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
548 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
559 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE
, vect_location
,
563 "outer-loop def-stmt defining inner-loop stmt.\n");
567 case vect_unused_in_scope
:
568 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
569 vect_used_in_scope
: vect_unused_in_scope
;
572 case vect_used_in_outer_by_reduction
:
573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
574 relevant
= vect_used_by_reduction
;
577 case vect_used_in_outer
:
578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
579 relevant
= vect_used_in_scope
;
582 case vect_used_in_scope
:
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
597 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE
, vect_location
,
601 "inner-loop def-stmt defining outer-loop stmt.\n");
605 case vect_unused_in_scope
:
606 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
607 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
608 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
611 case vect_used_by_reduction
:
612 relevant
= vect_used_in_outer_by_reduction
;
615 case vect_used_in_scope
:
616 relevant
= vect_used_in_outer
;
624 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
625 is_pattern_stmt_p (stmt_vinfo
));
630 /* Function vect_mark_stmts_to_be_vectorized.
632 Not all stmts in the loop need to be vectorized. For example:
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
644 This pass detects such stmts. */
647 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
649 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
650 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
651 unsigned int nbbs
= loop
->num_nodes
;
652 gimple_stmt_iterator si
;
655 stmt_vec_info stmt_vinfo
;
659 enum vect_relevant relevant
, tmp_relevant
;
660 enum vect_def_type def_type
;
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_NOTE
, vect_location
,
664 "=== vect_mark_stmts_to_be_vectorized ===\n");
666 auto_vec
<gimple
, 64> worklist
;
668 /* 1. Init worklist. */
669 for (i
= 0; i
< nbbs
; i
++)
672 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
681 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
682 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
684 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
686 stmt
= gsi_stmt (si
);
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
690 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
693 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
694 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
698 /* 2. Process_worklist */
699 while (worklist
.length () > 0)
704 stmt
= worklist
.pop ();
705 if (dump_enabled_p ())
707 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
708 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
713 liveness and relevance properties of STMT. */
714 stmt_vinfo
= vinfo_for_stmt (stmt
);
715 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
716 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
718 /* Generally, the liveness and relevance properties of STMT are
719 propagated as is to the DEF_STMTs of its USEs:
720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
723 One exception is when STMT has been identified as defining a reduction
724 variable; in this case we set the liveness/relevance as follows:
726 relevant = vect_used_by_reduction
727 This is because we distinguish between two kinds of relevant stmts -
728 those that are used by a reduction computation, and those that are
729 (also) used by a regular computation. This allows us later on to
730 identify stmts that are used solely by a reduction, and therefore the
731 order of the results that they produce does not have to be kept. */
733 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
734 tmp_relevant
= relevant
;
737 case vect_reduction_def
:
738 switch (tmp_relevant
)
740 case vect_unused_in_scope
:
741 relevant
= vect_used_by_reduction
;
744 case vect_used_by_reduction
:
745 if (gimple_code (stmt
) == GIMPLE_PHI
)
750 if (dump_enabled_p ())
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
752 "unsupported use of reduction.\n");
759 case vect_nested_cycle
:
760 if (tmp_relevant
!= vect_unused_in_scope
761 && tmp_relevant
!= vect_used_in_outer_by_reduction
762 && tmp_relevant
!= vect_used_in_outer
)
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
766 "unsupported use of nested cycle.\n");
774 case vect_double_reduction_def
:
775 if (tmp_relevant
!= vect_unused_in_scope
776 && tmp_relevant
!= vect_used_by_reduction
)
778 if (dump_enabled_p ())
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
780 "unsupported use of double reduction.\n");
792 if (is_pattern_stmt_p (stmt_vinfo
))
794 /* Pattern statements are not inserted into the code, so
795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796 have to scan the RHS or function arguments instead. */
797 if (is_gimple_assign (stmt
))
799 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
800 tree op
= gimple_assign_rhs1 (stmt
);
803 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
805 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
806 live_p
, relevant
, &worklist
, false)
807 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
808 live_p
, relevant
, &worklist
, false))
812 for (; i
< gimple_num_ops (stmt
); i
++)
814 op
= gimple_op (stmt
, i
);
815 if (TREE_CODE (op
) == SSA_NAME
816 && !process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
821 else if (is_gimple_call (stmt
))
823 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
825 tree arg
= gimple_call_arg (stmt
, i
);
826 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
833 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
835 tree op
= USE_FROM_PTR (use_p
);
836 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
841 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
844 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
846 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
850 } /* while worklist */
856 /* Function vect_model_simple_cost.
858 Models cost for simple operations, i.e. those that only emit ncopies of a
859 single op. Right now, this does not account for multiple insns that could
860 be generated for the single vector op. We will handle that shortly. */
863 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
864 enum vect_def_type
*dt
,
865 stmt_vector_for_cost
*prologue_cost_vec
,
866 stmt_vector_for_cost
*body_cost_vec
)
869 int inside_cost
= 0, prologue_cost
= 0;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info
))
875 /* FORNOW: Assuming maximum 2 args per stmts. */
876 for (i
= 0; i
< 2; i
++)
877 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
878 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
879 stmt_info
, 0, vect_prologue
);
881 /* Pass the inside-of-loop statements to the target-specific cost model. */
882 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
883 stmt_info
, 0, vect_body
);
885 if (dump_enabled_p ())
886 dump_printf_loc (MSG_NOTE
, vect_location
,
887 "vect_model_simple_cost: inside_cost = %d, "
888 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
892 /* Model cost for type demotion and promotion operations. PWR is normally
893 zero for single-step promotions and demotions. It will be one if
894 two-step promotion/demotion is required, and so on. Each additional
895 step doubles the number of instructions required. */
898 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
899 enum vect_def_type
*dt
, int pwr
)
902 int inside_cost
= 0, prologue_cost
= 0;
903 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
904 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
905 void *target_cost_data
;
907 /* The SLP costs were already calculated during SLP tree build. */
908 if (PURE_SLP_STMT (stmt_info
))
912 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
914 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
916 for (i
= 0; i
< pwr
+ 1; i
++)
918 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
920 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
921 vec_promote_demote
, stmt_info
, 0,
925 /* FORNOW: Assuming maximum 2 args per stmts. */
926 for (i
= 0; i
< 2; i
++)
927 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
928 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
929 stmt_info
, 0, vect_prologue
);
931 if (dump_enabled_p ())
932 dump_printf_loc (MSG_NOTE
, vect_location
,
933 "vect_model_promotion_demotion_cost: inside_cost = %d, "
934 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
937 /* Function vect_cost_group_size
939 For grouped load or store, return the group_size only if it is the first
940 load or store of a group, else return 1. This ensures that group size is
941 only returned once per group. */
944 vect_cost_group_size (stmt_vec_info stmt_info
)
946 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
948 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
949 return GROUP_SIZE (stmt_info
);
955 /* Function vect_model_store_cost
957 Models cost for stores. In the case of grouped accesses, one access
958 has the overhead of the grouped access attributed to it. */
961 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
962 bool store_lanes_p
, enum vect_def_type dt
,
964 stmt_vector_for_cost
*prologue_cost_vec
,
965 stmt_vector_for_cost
*body_cost_vec
)
968 unsigned int inside_cost
= 0, prologue_cost
= 0;
969 struct data_reference
*first_dr
;
972 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
973 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
974 stmt_info
, 0, vect_prologue
);
976 /* Grouped access? */
977 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
981 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
986 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
987 group_size
= vect_cost_group_size (stmt_info
);
990 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
992 /* Not a grouped access. */
996 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
999 /* We assume that the cost of a single store-lanes instruction is
1000 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
1001 access is instead being provided by a permute-and-store operation,
1002 include the cost of the permutes. */
1003 if (!store_lanes_p
&& group_size
> 1
1004 && !STMT_VINFO_STRIDED_P (stmt_info
))
1006 /* Uses a high and low interleave or shuffle operations for each
1008 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1009 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1010 stmt_info
, 0, vect_body
);
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE
, vect_location
,
1014 "vect_model_store_cost: strided group_size = %d .\n",
1018 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1019 /* Costs of the stores. */
1020 if (STMT_VINFO_STRIDED_P (stmt_info
)
1021 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1023 /* N scalar stores plus extracting the elements. */
1024 inside_cost
+= record_stmt_cost (body_cost_vec
,
1025 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1026 scalar_store
, stmt_info
, 0, vect_body
);
1029 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1031 if (STMT_VINFO_STRIDED_P (stmt_info
))
1032 inside_cost
+= record_stmt_cost (body_cost_vec
,
1033 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1034 vec_to_scalar
, stmt_info
, 0, vect_body
);
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE
, vect_location
,
1038 "vect_model_store_cost: inside_cost = %d, "
1039 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1043 /* Calculate cost of DR's memory access. */
1045 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1046 unsigned int *inside_cost
,
1047 stmt_vector_for_cost
*body_cost_vec
)
1049 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1050 gimple stmt
= DR_STMT (dr
);
1051 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1053 switch (alignment_support_scheme
)
1057 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1058 vector_store
, stmt_info
, 0,
1061 if (dump_enabled_p ())
1062 dump_printf_loc (MSG_NOTE
, vect_location
,
1063 "vect_model_store_cost: aligned.\n");
1067 case dr_unaligned_supported
:
1069 /* Here, we assign an additional cost for the unaligned store. */
1070 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1071 unaligned_store
, stmt_info
,
1072 DR_MISALIGNMENT (dr
), vect_body
);
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_NOTE
, vect_location
,
1075 "vect_model_store_cost: unaligned supported by "
1080 case dr_unaligned_unsupported
:
1082 *inside_cost
= VECT_MAX_COST
;
1084 if (dump_enabled_p ())
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1086 "vect_model_store_cost: unsupported access.\n");
1096 /* Function vect_model_load_cost
1098 Models cost for loads. In the case of grouped accesses, the last access
1099 has the overhead of the grouped access attributed to it. Since unaligned
1100 accesses are supported for loads, we also account for the costs of the
1101 access scheme chosen. */
1104 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1105 bool load_lanes_p
, slp_tree slp_node
,
1106 stmt_vector_for_cost
*prologue_cost_vec
,
1107 stmt_vector_for_cost
*body_cost_vec
)
1111 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1112 unsigned int inside_cost
= 0, prologue_cost
= 0;
1114 /* Grouped accesses? */
1115 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1116 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1118 group_size
= vect_cost_group_size (stmt_info
);
1119 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1121 /* Not a grouped access. */
1128 /* We assume that the cost of a single load-lanes instruction is
1129 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1130 access is instead being provided by a load-and-permute operation,
1131 include the cost of the permutes. */
1132 if (!load_lanes_p
&& group_size
> 1
1133 && !STMT_VINFO_STRIDED_P (stmt_info
))
1135 /* Uses an even and odd extract operations or shuffle operations
1136 for each needed permute. */
1137 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1138 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1139 stmt_info
, 0, vect_body
);
1141 if (dump_enabled_p ())
1142 dump_printf_loc (MSG_NOTE
, vect_location
,
1143 "vect_model_load_cost: strided group_size = %d .\n",
1147 /* The loads themselves. */
1148 if (STMT_VINFO_STRIDED_P (stmt_info
)
1149 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1151 /* N scalar loads plus gathering them into a vector. */
1152 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1153 inside_cost
+= record_stmt_cost (body_cost_vec
,
1154 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1155 scalar_load
, stmt_info
, 0, vect_body
);
1158 vect_get_load_cost (first_dr
, ncopies
,
1159 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1160 || group_size
> 1 || slp_node
),
1161 &inside_cost
, &prologue_cost
,
1162 prologue_cost_vec
, body_cost_vec
, true);
1163 if (STMT_VINFO_STRIDED_P (stmt_info
))
1164 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1165 stmt_info
, 0, vect_body
);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE
, vect_location
,
1169 "vect_model_load_cost: inside_cost = %d, "
1170 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1174 /* Calculate cost of DR's memory access. */
1176 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1177 bool add_realign_cost
, unsigned int *inside_cost
,
1178 unsigned int *prologue_cost
,
1179 stmt_vector_for_cost
*prologue_cost_vec
,
1180 stmt_vector_for_cost
*body_cost_vec
,
1181 bool record_prologue_costs
)
1183 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1184 gimple stmt
= DR_STMT (dr
);
1185 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1187 switch (alignment_support_scheme
)
1191 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1192 stmt_info
, 0, vect_body
);
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_NOTE
, vect_location
,
1196 "vect_model_load_cost: aligned.\n");
1200 case dr_unaligned_supported
:
1202 /* Here, we assign an additional cost for the unaligned load. */
1203 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1204 unaligned_load
, stmt_info
,
1205 DR_MISALIGNMENT (dr
), vect_body
);
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_NOTE
, vect_location
,
1209 "vect_model_load_cost: unaligned supported by "
1214 case dr_explicit_realign
:
1216 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1217 vector_load
, stmt_info
, 0, vect_body
);
1218 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1219 vec_perm
, stmt_info
, 0, vect_body
);
1221 /* FIXME: If the misalignment remains fixed across the iterations of
1222 the containing loop, the following cost should be added to the
1224 if (targetm
.vectorize
.builtin_mask_for_load
)
1225 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1226 stmt_info
, 0, vect_body
);
1228 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE
, vect_location
,
1230 "vect_model_load_cost: explicit realign\n");
1234 case dr_explicit_realign_optimized
:
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE
, vect_location
,
1238 "vect_model_load_cost: unaligned software "
1241 /* Unaligned software pipeline has a load of an address, an initial
1242 load, and possibly a mask operation to "prime" the loop. However,
1243 if this is an access in a group of loads, which provide grouped
1244 access, then the above cost should only be considered for one
1245 access in the group. Inside the loop, there is a load op
1246 and a realignment op. */
1248 if (add_realign_cost
&& record_prologue_costs
)
1250 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1251 vector_stmt
, stmt_info
,
1253 if (targetm
.vectorize
.builtin_mask_for_load
)
1254 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1255 vector_stmt
, stmt_info
,
1259 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1260 stmt_info
, 0, vect_body
);
1261 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1262 stmt_info
, 0, vect_body
);
1264 if (dump_enabled_p ())
1265 dump_printf_loc (MSG_NOTE
, vect_location
,
1266 "vect_model_load_cost: explicit realign optimized"
1272 case dr_unaligned_unsupported
:
1274 *inside_cost
= VECT_MAX_COST
;
1276 if (dump_enabled_p ())
1277 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1278 "vect_model_load_cost: unsupported access.\n");
1287 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1288 the loop preheader for the vectorized stmt STMT. */
1291 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1294 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1297 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1298 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1302 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1306 if (nested_in_vect_loop_p (loop
, stmt
))
1309 pe
= loop_preheader_edge (loop
);
1310 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1311 gcc_assert (!new_bb
);
1315 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1317 gimple_stmt_iterator gsi_bb_start
;
1319 gcc_assert (bb_vinfo
);
1320 bb
= BB_VINFO_BB (bb_vinfo
);
1321 gsi_bb_start
= gsi_after_labels (bb
);
1322 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1326 if (dump_enabled_p ())
1328 dump_printf_loc (MSG_NOTE
, vect_location
,
1329 "created new init_stmt: ");
1330 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1334 /* Function vect_init_vector.
1336 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1337 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1338 vector type a vector with all elements equal to VAL is created first.
1339 Place the initialization at BSI if it is not NULL. Otherwise, place the
1340 initialization at the loop preheader.
1341 Return the DEF of INIT_STMT.
1342 It will be used in the vectorization of STMT. */
1345 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1352 if (TREE_CODE (type
) == VECTOR_TYPE
1353 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1355 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1357 if (CONSTANT_CLASS_P (val
))
1358 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1361 new_temp
= make_ssa_name (TREE_TYPE (type
));
1362 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1363 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1367 val
= build_vector_from_val (type
, val
);
1370 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1371 init_stmt
= gimple_build_assign (new_var
, val
);
1372 new_temp
= make_ssa_name (new_var
, init_stmt
);
1373 gimple_assign_set_lhs (init_stmt
, new_temp
);
1374 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1375 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1380 /* Function vect_get_vec_def_for_operand.
1382 OP is an operand in STMT. This function returns a (vector) def that will be
1383 used in the vectorized stmt for STMT.
1385 In the case that OP is an SSA_NAME which is defined in the loop, then
1386 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1388 In case OP is an invariant or constant, a new stmt that creates a vector def
1389 needs to be introduced. */
1392 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1397 stmt_vec_info def_stmt_info
= NULL
;
1398 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1399 unsigned int nunits
;
1400 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1402 enum vect_def_type dt
;
1406 if (dump_enabled_p ())
1408 dump_printf_loc (MSG_NOTE
, vect_location
,
1409 "vect_get_vec_def_for_operand: ");
1410 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1411 dump_printf (MSG_NOTE
, "\n");
1414 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1415 &def_stmt
, &def
, &dt
);
1416 gcc_assert (is_simple_use
);
1417 if (dump_enabled_p ())
1419 int loc_printed
= 0;
1422 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1424 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1425 dump_printf (MSG_NOTE
, "\n");
1430 dump_printf (MSG_NOTE
, " def_stmt = ");
1432 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1433 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1439 /* Case 1: operand is a constant. */
1440 case vect_constant_def
:
1442 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1443 gcc_assert (vector_type
);
1444 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1449 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1450 if (dump_enabled_p ())
1451 dump_printf_loc (MSG_NOTE
, vect_location
,
1452 "Create vector_cst. nunits = %d\n", nunits
);
1454 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1457 /* Case 2: operand is defined outside the loop - loop invariant. */
1458 case vect_external_def
:
1460 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1461 gcc_assert (vector_type
);
1466 /* Create 'vec_inv = {inv,inv,..,inv}' */
1467 if (dump_enabled_p ())
1468 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1470 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1473 /* Case 3: operand is defined inside the loop. */
1474 case vect_internal_def
:
1477 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1479 /* Get the def from the vectorized stmt. */
1480 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1482 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1483 /* Get vectorized pattern statement. */
1485 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1486 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1487 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1488 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1489 gcc_assert (vec_stmt
);
1490 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1491 vec_oprnd
= PHI_RESULT (vec_stmt
);
1492 else if (is_gimple_call (vec_stmt
))
1493 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1495 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1499 /* Case 4: operand is defined by a loop header phi - reduction */
1500 case vect_reduction_def
:
1501 case vect_double_reduction_def
:
1502 case vect_nested_cycle
:
1506 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1507 loop
= (gimple_bb (def_stmt
))->loop_father
;
1509 /* Get the def before the loop */
1510 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1511 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1514 /* Case 5: operand is defined by loop-header phi - induction. */
1515 case vect_induction_def
:
1517 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1519 /* Get the def from the vectorized stmt. */
1520 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1521 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1522 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1523 vec_oprnd
= PHI_RESULT (vec_stmt
);
1525 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1535 /* Function vect_get_vec_def_for_stmt_copy
1537 Return a vector-def for an operand. This function is used when the
1538 vectorized stmt to be created (by the caller to this function) is a "copy"
1539 created in case the vectorized result cannot fit in one vector, and several
1540 copies of the vector-stmt are required. In this case the vector-def is
1541 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1542 of the stmt that defines VEC_OPRND.
1543 DT is the type of the vector def VEC_OPRND.
1546 In case the vectorization factor (VF) is bigger than the number
1547 of elements that can fit in a vectype (nunits), we have to generate
1548 more than one vector stmt to vectorize the scalar stmt. This situation
1549 arises when there are multiple data-types operated upon in the loop; the
1550 smallest data-type determines the VF, and as a result, when vectorizing
1551 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1552 vector stmt (each computing a vector of 'nunits' results, and together
1553 computing 'VF' results in each iteration). This function is called when
1554 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1555 which VF=16 and nunits=4, so the number of copies required is 4):
1557 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1559 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1560 VS1.1: vx.1 = memref1 VS1.2
1561 VS1.2: vx.2 = memref2 VS1.3
1562 VS1.3: vx.3 = memref3
1564 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1565 VSnew.1: vz1 = vx.1 + ... VSnew.2
1566 VSnew.2: vz2 = vx.2 + ... VSnew.3
1567 VSnew.3: vz3 = vx.3 + ...
1569 The vectorization of S1 is explained in vectorizable_load.
1570 The vectorization of S2:
1571 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1572 the function 'vect_get_vec_def_for_operand' is called to
1573 get the relevant vector-def for each operand of S2. For operand x it
1574 returns the vector-def 'vx.0'.
1576 To create the remaining copies of the vector-stmt (VSnew.j), this
1577 function is called to get the relevant vector-def for each operand. It is
1578 obtained from the respective VS1.j stmt, which is recorded in the
1579 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1581 For example, to obtain the vector-def 'vx.1' in order to create the
1582 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1583 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1584 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1585 and return its def ('vx.1').
1586 Overall, to create the above sequence this function will be called 3 times:
1587 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1588 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1589 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1592 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1594 gimple vec_stmt_for_operand
;
1595 stmt_vec_info def_stmt_info
;
1597 /* Do nothing; can reuse same def. */
1598 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1601 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1602 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1603 gcc_assert (def_stmt_info
);
1604 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1605 gcc_assert (vec_stmt_for_operand
);
1606 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1607 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1608 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1610 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1615 /* Get vectorized definitions for the operands to create a copy of an original
1616 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1619 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1620 vec
<tree
> *vec_oprnds0
,
1621 vec
<tree
> *vec_oprnds1
)
1623 tree vec_oprnd
= vec_oprnds0
->pop ();
1625 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1626 vec_oprnds0
->quick_push (vec_oprnd
);
1628 if (vec_oprnds1
&& vec_oprnds1
->length ())
1630 vec_oprnd
= vec_oprnds1
->pop ();
1631 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1632 vec_oprnds1
->quick_push (vec_oprnd
);
1637 /* Get vectorized definitions for OP0 and OP1.
1638 REDUC_INDEX is the index of reduction operand in case of reduction,
1639 and -1 otherwise. */
1642 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1643 vec
<tree
> *vec_oprnds0
,
1644 vec
<tree
> *vec_oprnds1
,
1645 slp_tree slp_node
, int reduc_index
)
1649 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1650 auto_vec
<tree
> ops (nops
);
1651 auto_vec
<vec
<tree
> > vec_defs (nops
);
1653 ops
.quick_push (op0
);
1655 ops
.quick_push (op1
);
1657 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1659 *vec_oprnds0
= vec_defs
[0];
1661 *vec_oprnds1
= vec_defs
[1];
1667 vec_oprnds0
->create (1);
1668 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1669 vec_oprnds0
->quick_push (vec_oprnd
);
1673 vec_oprnds1
->create (1);
1674 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1675 vec_oprnds1
->quick_push (vec_oprnd
);
1681 /* Function vect_finish_stmt_generation.
1683 Insert a new stmt. */
1686 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1687 gimple_stmt_iterator
*gsi
)
1689 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1690 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1691 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1693 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1695 if (!gsi_end_p (*gsi
)
1696 && gimple_has_mem_ops (vec_stmt
))
1698 gimple at_stmt
= gsi_stmt (*gsi
);
1699 tree vuse
= gimple_vuse (at_stmt
);
1700 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1702 tree vdef
= gimple_vdef (at_stmt
);
1703 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1704 /* If we have an SSA vuse and insert a store, update virtual
1705 SSA form to avoid triggering the renamer. Do so only
1706 if we can easily see all uses - which is what almost always
1707 happens with the way vectorized stmts are inserted. */
1708 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1709 && ((is_gimple_assign (vec_stmt
)
1710 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1711 || (is_gimple_call (vec_stmt
)
1712 && !(gimple_call_flags (vec_stmt
)
1713 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1715 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1716 gimple_set_vdef (vec_stmt
, new_vdef
);
1717 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1721 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1723 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1726 if (dump_enabled_p ())
1728 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1729 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1732 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1734 /* While EH edges will generally prevent vectorization, stmt might
1735 e.g. be in a must-not-throw region. Ensure newly created stmts
1736 that could throw are part of the same region. */
1737 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1738 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1739 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1742 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1743 a function declaration if the target has a vectorized version
1744 of the function, or NULL_TREE if the function cannot be vectorized. */
1747 vectorizable_function (gcall
*call
, tree vectype_out
, tree vectype_in
)
1749 tree fndecl
= gimple_call_fndecl (call
);
1751 /* We only handle functions that do not read or clobber memory -- i.e.
1752 const or novops ones. */
1753 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1757 || TREE_CODE (fndecl
) != FUNCTION_DECL
1758 || !DECL_BUILT_IN (fndecl
))
1761 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1766 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1767 gimple_stmt_iterator
*);
1770 /* Function vectorizable_mask_load_store.
1772 Check if STMT performs a conditional load or store that can be vectorized.
1773 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1774 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1775 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1778 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1779 gimple
*vec_stmt
, slp_tree slp_node
)
1781 tree vec_dest
= NULL
;
1782 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1783 stmt_vec_info prev_stmt_info
;
1784 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1785 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1786 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1787 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1788 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1792 tree dataref_ptr
= NULL_TREE
;
1794 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1798 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1799 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1800 int gather_scale
= 1;
1801 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1806 enum vect_def_type dt
;
1808 if (slp_node
!= NULL
)
1811 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1812 gcc_assert (ncopies
>= 1);
1814 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1815 mask
= gimple_call_arg (stmt
, 2);
1816 if (TYPE_PRECISION (TREE_TYPE (mask
))
1817 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1820 /* FORNOW. This restriction should be relaxed. */
1821 if (nested_in_vect_loop
&& ncopies
> 1)
1823 if (dump_enabled_p ())
1824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1825 "multiple types in nested loop.");
1829 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1832 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1835 if (!STMT_VINFO_DATA_REF (stmt_info
))
1838 elem_type
= TREE_TYPE (vectype
);
1840 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1843 if (STMT_VINFO_STRIDED_P (stmt_info
))
1846 if (STMT_VINFO_GATHER_P (stmt_info
))
1850 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1851 &gather_off
, &gather_scale
);
1852 gcc_assert (gather_decl
);
1853 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1854 &def_stmt
, &def
, &gather_dt
,
1855 &gather_off_vectype
))
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1859 "gather index use not simple.");
1863 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1865 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1866 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1870 "masked gather with integer mask not supported.");
1874 else if (tree_int_cst_compare (nested_in_vect_loop
1875 ? STMT_VINFO_DR_STEP (stmt_info
)
1876 : DR_STEP (dr
), size_zero_node
) <= 0)
1878 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1879 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1882 if (TREE_CODE (mask
) != SSA_NAME
)
1885 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1886 &def_stmt
, &def
, &dt
))
1891 tree rhs
= gimple_call_arg (stmt
, 3);
1892 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1893 &def_stmt
, &def
, &dt
))
1897 if (!vec_stmt
) /* transformation not required. */
1899 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1901 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1904 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1910 if (STMT_VINFO_GATHER_P (stmt_info
))
1912 tree vec_oprnd0
= NULL_TREE
, op
;
1913 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1914 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1915 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1916 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1917 tree mask_perm_mask
= NULL_TREE
;
1918 edge pe
= loop_preheader_edge (loop
);
1921 enum { NARROW
, NONE
, WIDEN
} modifier
;
1922 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1924 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1925 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1926 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1927 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1928 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1929 scaletype
= TREE_VALUE (arglist
);
1930 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1931 && types_compatible_p (srctype
, masktype
));
1933 if (nunits
== gather_off_nunits
)
1935 else if (nunits
== gather_off_nunits
/ 2)
1937 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1940 for (i
= 0; i
< gather_off_nunits
; ++i
)
1941 sel
[i
] = i
| nunits
;
1943 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1945 else if (nunits
== gather_off_nunits
* 2)
1947 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1950 for (i
= 0; i
< nunits
; ++i
)
1951 sel
[i
] = i
< gather_off_nunits
1952 ? i
: i
+ nunits
- gather_off_nunits
;
1954 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1956 for (i
= 0; i
< nunits
; ++i
)
1957 sel
[i
] = i
| gather_off_nunits
;
1958 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1963 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1965 ptr
= fold_convert (ptrtype
, gather_base
);
1966 if (!is_gimple_min_invariant (ptr
))
1968 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1969 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1970 gcc_assert (!new_bb
);
1973 scale
= build_int_cst (scaletype
, gather_scale
);
1975 prev_stmt_info
= NULL
;
1976 for (j
= 0; j
< ncopies
; ++j
)
1978 if (modifier
== WIDEN
&& (j
& 1))
1979 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1980 perm_mask
, stmt
, gsi
);
1983 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1986 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1988 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1990 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1991 == TYPE_VECTOR_SUBPARTS (idxtype
));
1992 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1993 var
= make_ssa_name (var
);
1994 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1996 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1997 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2001 if (mask_perm_mask
&& (j
& 1))
2002 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2003 mask_perm_mask
, stmt
, gsi
);
2007 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2010 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
2011 &def_stmt
, &def
, &dt
);
2012 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2016 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2018 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2019 == TYPE_VECTOR_SUBPARTS (masktype
));
2020 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
2022 var
= make_ssa_name (var
);
2023 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2025 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2026 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2032 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2035 if (!useless_type_conversion_p (vectype
, rettype
))
2037 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2038 == TYPE_VECTOR_SUBPARTS (rettype
));
2039 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2040 op
= make_ssa_name (var
, new_stmt
);
2041 gimple_call_set_lhs (new_stmt
, op
);
2042 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2043 var
= make_ssa_name (vec_dest
);
2044 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2045 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2049 var
= make_ssa_name (vec_dest
, new_stmt
);
2050 gimple_call_set_lhs (new_stmt
, var
);
2053 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2055 if (modifier
== NARROW
)
2062 var
= permute_vec_elements (prev_res
, var
,
2063 perm_mask
, stmt
, gsi
);
2064 new_stmt
= SSA_NAME_DEF_STMT (var
);
2067 if (prev_stmt_info
== NULL
)
2068 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2070 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2071 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2074 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2076 tree lhs
= gimple_call_lhs (stmt
);
2077 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2078 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2079 set_vinfo_for_stmt (stmt
, NULL
);
2080 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2081 gsi_replace (gsi
, new_stmt
, true);
2086 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2087 prev_stmt_info
= NULL
;
2088 for (i
= 0; i
< ncopies
; i
++)
2090 unsigned align
, misalign
;
2094 tree rhs
= gimple_call_arg (stmt
, 3);
2095 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2096 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2097 /* We should have catched mismatched types earlier. */
2098 gcc_assert (useless_type_conversion_p (vectype
,
2099 TREE_TYPE (vec_rhs
)));
2100 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2101 NULL_TREE
, &dummy
, gsi
,
2102 &ptr_incr
, false, &inv_p
);
2103 gcc_assert (!inv_p
);
2107 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2109 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2110 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2112 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2113 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2114 TYPE_SIZE_UNIT (vectype
));
2117 align
= TYPE_ALIGN_UNIT (vectype
);
2118 if (aligned_access_p (dr
))
2120 else if (DR_MISALIGNMENT (dr
) == -1)
2122 align
= TYPE_ALIGN_UNIT (elem_type
);
2126 misalign
= DR_MISALIGNMENT (dr
);
2127 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2130 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2131 gimple_call_arg (stmt
, 1),
2133 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2135 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2137 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2138 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2143 tree vec_mask
= NULL_TREE
;
2144 prev_stmt_info
= NULL
;
2145 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2146 for (i
= 0; i
< ncopies
; i
++)
2148 unsigned align
, misalign
;
2152 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2153 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2154 NULL_TREE
, &dummy
, gsi
,
2155 &ptr_incr
, false, &inv_p
);
2156 gcc_assert (!inv_p
);
2160 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2162 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2163 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2164 TYPE_SIZE_UNIT (vectype
));
2167 align
= TYPE_ALIGN_UNIT (vectype
);
2168 if (aligned_access_p (dr
))
2170 else if (DR_MISALIGNMENT (dr
) == -1)
2172 align
= TYPE_ALIGN_UNIT (elem_type
);
2176 misalign
= DR_MISALIGNMENT (dr
);
2177 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2180 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2181 gimple_call_arg (stmt
, 1),
2183 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2184 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2186 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2188 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2189 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2195 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2197 tree lhs
= gimple_call_lhs (stmt
);
2198 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2199 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2200 set_vinfo_for_stmt (stmt
, NULL
);
2201 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2202 gsi_replace (gsi
, new_stmt
, true);
2209 /* Function vectorizable_call.
2211 Check if GS performs a function call that can be vectorized.
2212 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2213 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2214 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2217 vectorizable_call (gimple gs
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2224 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2225 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2226 tree vectype_out
, vectype_in
;
2229 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2230 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2231 tree fndecl
, new_temp
, def
, rhs_type
;
2233 enum vect_def_type dt
[3]
2234 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2235 gimple new_stmt
= NULL
;
2237 vec
<tree
> vargs
= vNULL
;
2238 enum { NARROW
, NONE
, WIDEN
} modifier
;
2242 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2245 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2248 /* Is GS a vectorizable call? */
2249 stmt
= dyn_cast
<gcall
*> (gs
);
2253 if (gimple_call_internal_p (stmt
)
2254 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2255 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2256 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2259 if (gimple_call_lhs (stmt
) == NULL_TREE
2260 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2263 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2265 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2267 /* Process function arguments. */
2268 rhs_type
= NULL_TREE
;
2269 vectype_in
= NULL_TREE
;
2270 nargs
= gimple_call_num_args (stmt
);
2272 /* Bail out if the function has more than three arguments, we do not have
2273 interesting builtin functions to vectorize with more than two arguments
2274 except for fma. No arguments is also not good. */
2275 if (nargs
== 0 || nargs
> 3)
2278 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2279 if (gimple_call_internal_p (stmt
)
2280 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2283 rhs_type
= unsigned_type_node
;
2286 for (i
= 0; i
< nargs
; i
++)
2290 op
= gimple_call_arg (stmt
, i
);
2292 /* We can only handle calls with arguments of the same type. */
2294 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2298 "argument types differ.\n");
2302 rhs_type
= TREE_TYPE (op
);
2304 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2305 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2307 if (dump_enabled_p ())
2308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2309 "use not simple.\n");
2314 vectype_in
= opvectype
;
2316 && opvectype
!= vectype_in
)
2318 if (dump_enabled_p ())
2319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2320 "argument vector types differ.\n");
2324 /* If all arguments are external or constant defs use a vector type with
2325 the same size as the output vector type. */
2327 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2329 gcc_assert (vectype_in
);
2332 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2335 "no vectype for scalar type ");
2336 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2337 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2344 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2345 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2346 if (nunits_in
== nunits_out
/ 2)
2348 else if (nunits_out
== nunits_in
)
2350 else if (nunits_out
== nunits_in
/ 2)
2355 /* For now, we only vectorize functions if a target specific builtin
2356 is available. TODO -- in some cases, it might be profitable to
2357 insert the calls for pieces of the vector, in order to be able
2358 to vectorize other operations in the loop. */
2359 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2360 if (fndecl
== NULL_TREE
)
2362 if (gimple_call_internal_p (stmt
)
2363 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2366 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2367 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2368 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2369 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2371 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2372 { 0, 1, 2, ... vf - 1 } vector. */
2373 gcc_assert (nargs
== 0);
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2379 "function is not vectorizable.\n");
2384 gcc_assert (!gimple_vuse (stmt
));
2386 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2388 else if (modifier
== NARROW
)
2389 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2391 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2393 /* Sanity check: make sure that at least one copy of the vectorized stmt
2394 needs to be generated. */
2395 gcc_assert (ncopies
>= 1);
2397 if (!vec_stmt
) /* transformation not required. */
2399 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2400 if (dump_enabled_p ())
2401 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2403 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2413 scalar_dest
= gimple_call_lhs (stmt
);
2414 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2416 prev_stmt_info
= NULL
;
2420 for (j
= 0; j
< ncopies
; ++j
)
2422 /* Build argument list for the vectorized call. */
2424 vargs
.create (nargs
);
2430 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2431 vec
<tree
> vec_oprnds0
;
2433 for (i
= 0; i
< nargs
; i
++)
2434 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2435 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2436 vec_oprnds0
= vec_defs
[0];
2438 /* Arguments are ready. Create the new vector stmt. */
2439 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2442 for (k
= 0; k
< nargs
; k
++)
2444 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2445 vargs
[k
] = vec_oprndsk
[i
];
2447 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2448 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2449 gimple_call_set_lhs (new_stmt
, new_temp
);
2450 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2451 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2454 for (i
= 0; i
< nargs
; i
++)
2456 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2457 vec_oprndsi
.release ();
2462 for (i
= 0; i
< nargs
; i
++)
2464 op
= gimple_call_arg (stmt
, i
);
2467 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2470 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2472 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2475 vargs
.quick_push (vec_oprnd0
);
2478 if (gimple_call_internal_p (stmt
)
2479 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2481 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2483 for (k
= 0; k
< nunits_out
; ++k
)
2484 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2485 tree cst
= build_vector (vectype_out
, v
);
2487 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2488 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2489 new_temp
= make_ssa_name (new_var
, init_stmt
);
2490 gimple_assign_set_lhs (init_stmt
, new_temp
);
2491 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2492 new_temp
= make_ssa_name (vec_dest
);
2493 new_stmt
= gimple_build_assign (new_temp
,
2494 gimple_assign_lhs (init_stmt
));
2498 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2499 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2500 gimple_call_set_lhs (new_stmt
, new_temp
);
2502 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2505 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2507 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2509 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2515 for (j
= 0; j
< ncopies
; ++j
)
2517 /* Build argument list for the vectorized call. */
2519 vargs
.create (nargs
* 2);
2525 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2526 vec
<tree
> vec_oprnds0
;
2528 for (i
= 0; i
< nargs
; i
++)
2529 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2530 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2531 vec_oprnds0
= vec_defs
[0];
2533 /* Arguments are ready. Create the new vector stmt. */
2534 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2538 for (k
= 0; k
< nargs
; k
++)
2540 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2541 vargs
.quick_push (vec_oprndsk
[i
]);
2542 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2544 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2545 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2546 gimple_call_set_lhs (new_stmt
, new_temp
);
2547 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2548 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2551 for (i
= 0; i
< nargs
; i
++)
2553 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2554 vec_oprndsi
.release ();
2559 for (i
= 0; i
< nargs
; i
++)
2561 op
= gimple_call_arg (stmt
, i
);
2565 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2567 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2571 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2573 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2575 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2578 vargs
.quick_push (vec_oprnd0
);
2579 vargs
.quick_push (vec_oprnd1
);
2582 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2583 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2584 gimple_call_set_lhs (new_stmt
, new_temp
);
2585 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2588 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2590 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2592 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2595 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2600 /* No current target implements this case. */
2606 /* The call in STMT might prevent it from being removed in dce.
2607 We however cannot remove it here, due to the way the ssa name
2608 it defines is mapped to the new definition. So just replace
2609 rhs of the statement with something harmless. */
2614 type
= TREE_TYPE (scalar_dest
);
2615 if (is_pattern_stmt_p (stmt_info
))
2616 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2618 lhs
= gimple_call_lhs (stmt
);
2619 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2620 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2621 set_vinfo_for_stmt (stmt
, NULL
);
2622 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2623 gsi_replace (gsi
, new_stmt
, false);
2629 struct simd_call_arg_info
2633 enum vect_def_type dt
;
2634 HOST_WIDE_INT linear_step
;
2638 /* Function vectorizable_simd_clone_call.
2640 Check if STMT performs a function call that can be vectorized
2641 by calling a simd clone of the function.
2642 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2643 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2644 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2647 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2648 gimple
*vec_stmt
, slp_tree slp_node
)
2653 tree vec_oprnd0
= NULL_TREE
;
2654 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2656 unsigned int nunits
;
2657 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2658 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2659 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2660 tree fndecl
, new_temp
, def
;
2662 gimple new_stmt
= NULL
;
2664 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2665 vec
<tree
> vargs
= vNULL
;
2667 tree lhs
, rtype
, ratype
;
2668 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2670 /* Is STMT a vectorizable call? */
2671 if (!is_gimple_call (stmt
))
2674 fndecl
= gimple_call_fndecl (stmt
);
2675 if (fndecl
== NULL_TREE
)
2678 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2679 if (node
== NULL
|| node
->simd_clones
== NULL
)
2682 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2685 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2688 if (gimple_call_lhs (stmt
)
2689 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2692 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2694 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2696 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2700 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2703 /* Process function arguments. */
2704 nargs
= gimple_call_num_args (stmt
);
2706 /* Bail out if the function has zero arguments. */
2710 arginfo
.create (nargs
);
2712 for (i
= 0; i
< nargs
; i
++)
2714 simd_call_arg_info thisarginfo
;
2717 thisarginfo
.linear_step
= 0;
2718 thisarginfo
.align
= 0;
2719 thisarginfo
.op
= NULL_TREE
;
2721 op
= gimple_call_arg (stmt
, i
);
2722 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2723 &def_stmt
, &def
, &thisarginfo
.dt
,
2724 &thisarginfo
.vectype
)
2725 || thisarginfo
.dt
== vect_uninitialized_def
)
2727 if (dump_enabled_p ())
2728 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2729 "use not simple.\n");
2734 if (thisarginfo
.dt
== vect_constant_def
2735 || thisarginfo
.dt
== vect_external_def
)
2736 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2738 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2740 /* For linear arguments, the analyze phase should have saved
2741 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2742 if (i
* 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2743 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2])
2745 gcc_assert (vec_stmt
);
2746 thisarginfo
.linear_step
2747 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2]);
2749 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 1];
2750 /* If loop has been peeled for alignment, we need to adjust it. */
2751 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2752 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2755 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2756 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2];
2757 tree opt
= TREE_TYPE (thisarginfo
.op
);
2758 bias
= fold_convert (TREE_TYPE (step
), bias
);
2759 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2761 = fold_build2 (POINTER_TYPE_P (opt
)
2762 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2763 thisarginfo
.op
, bias
);
2767 && thisarginfo
.dt
!= vect_constant_def
2768 && thisarginfo
.dt
!= vect_external_def
2770 && TREE_CODE (op
) == SSA_NAME
2771 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2773 && tree_fits_shwi_p (iv
.step
))
2775 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2776 thisarginfo
.op
= iv
.base
;
2778 else if ((thisarginfo
.dt
== vect_constant_def
2779 || thisarginfo
.dt
== vect_external_def
)
2780 && POINTER_TYPE_P (TREE_TYPE (op
)))
2781 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2783 arginfo
.quick_push (thisarginfo
);
2786 unsigned int badness
= 0;
2787 struct cgraph_node
*bestn
= NULL
;
2788 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2789 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2791 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2792 n
= n
->simdclone
->next_clone
)
2794 unsigned int this_badness
= 0;
2795 if (n
->simdclone
->simdlen
2796 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2797 || n
->simdclone
->nargs
!= nargs
)
2799 if (n
->simdclone
->simdlen
2800 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2801 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2802 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2803 if (n
->simdclone
->inbranch
)
2804 this_badness
+= 2048;
2805 int target_badness
= targetm
.simd_clone
.usable (n
);
2806 if (target_badness
< 0)
2808 this_badness
+= target_badness
* 512;
2809 /* FORNOW: Have to add code to add the mask argument. */
2810 if (n
->simdclone
->inbranch
)
2812 for (i
= 0; i
< nargs
; i
++)
2814 switch (n
->simdclone
->args
[i
].arg_type
)
2816 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2817 if (!useless_type_conversion_p
2818 (n
->simdclone
->args
[i
].orig_type
,
2819 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2821 else if (arginfo
[i
].dt
== vect_constant_def
2822 || arginfo
[i
].dt
== vect_external_def
2823 || arginfo
[i
].linear_step
)
2826 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2827 if (arginfo
[i
].dt
!= vect_constant_def
2828 && arginfo
[i
].dt
!= vect_external_def
)
2831 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2832 if (arginfo
[i
].dt
== vect_constant_def
2833 || arginfo
[i
].dt
== vect_external_def
2834 || (arginfo
[i
].linear_step
2835 != n
->simdclone
->args
[i
].linear_step
))
2838 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2842 case SIMD_CLONE_ARG_TYPE_MASK
:
2845 if (i
== (size_t) -1)
2847 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2852 if (arginfo
[i
].align
)
2853 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2854 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2856 if (i
== (size_t) -1)
2858 if (bestn
== NULL
|| this_badness
< badness
)
2861 badness
= this_badness
;
2871 for (i
= 0; i
< nargs
; i
++)
2872 if ((arginfo
[i
].dt
== vect_constant_def
2873 || arginfo
[i
].dt
== vect_external_def
)
2874 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2877 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2879 if (arginfo
[i
].vectype
== NULL
2880 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2881 > bestn
->simdclone
->simdlen
))
2888 fndecl
= bestn
->decl
;
2889 nunits
= bestn
->simdclone
->simdlen
;
2890 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2892 /* If the function isn't const, only allow it in simd loops where user
2893 has asserted that at least nunits consecutive iterations can be
2894 performed using SIMD instructions. */
2895 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2896 && gimple_vuse (stmt
))
2902 /* Sanity check: make sure that at least one copy of the vectorized stmt
2903 needs to be generated. */
2904 gcc_assert (ncopies
>= 1);
2906 if (!vec_stmt
) /* transformation not required. */
2908 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
2909 for (i
= 0; i
< nargs
; i
++)
2910 if (bestn
->simdclone
->args
[i
].arg_type
2911 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
2913 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 2
2915 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
2916 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
2917 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
2918 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
2919 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
2921 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2922 if (dump_enabled_p ())
2923 dump_printf_loc (MSG_NOTE
, vect_location
,
2924 "=== vectorizable_simd_clone_call ===\n");
2925 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2932 if (dump_enabled_p ())
2933 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2936 scalar_dest
= gimple_call_lhs (stmt
);
2937 vec_dest
= NULL_TREE
;
2942 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2943 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2944 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2947 rtype
= TREE_TYPE (ratype
);
2951 prev_stmt_info
= NULL
;
2952 for (j
= 0; j
< ncopies
; ++j
)
2954 /* Build argument list for the vectorized call. */
2956 vargs
.create (nargs
);
2960 for (i
= 0; i
< nargs
; i
++)
2962 unsigned int k
, l
, m
, o
;
2964 op
= gimple_call_arg (stmt
, i
);
2965 switch (bestn
->simdclone
->args
[i
].arg_type
)
2967 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2968 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2969 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2970 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2972 if (TYPE_VECTOR_SUBPARTS (atype
)
2973 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2975 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2976 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2977 / TYPE_VECTOR_SUBPARTS (atype
));
2978 gcc_assert ((k
& (k
- 1)) == 0);
2981 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2984 vec_oprnd0
= arginfo
[i
].op
;
2985 if ((m
& (k
- 1)) == 0)
2987 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2990 arginfo
[i
].op
= vec_oprnd0
;
2992 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2994 bitsize_int ((m
& (k
- 1)) * prec
));
2996 = gimple_build_assign (make_ssa_name (atype
),
2998 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2999 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3003 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3004 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3005 gcc_assert ((k
& (k
- 1)) == 0);
3006 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3008 vec_alloc (ctor_elts
, k
);
3011 for (l
= 0; l
< k
; l
++)
3013 if (m
== 0 && l
== 0)
3015 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3018 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3020 arginfo
[i
].op
= vec_oprnd0
;
3023 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3027 vargs
.safe_push (vec_oprnd0
);
3030 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3032 = gimple_build_assign (make_ssa_name (atype
),
3034 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3035 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3040 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3041 vargs
.safe_push (op
);
3043 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3048 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3053 edge pe
= loop_preheader_edge (loop
);
3054 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3055 gcc_assert (!new_bb
);
3057 tree phi_res
= copy_ssa_name (op
);
3058 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3059 set_vinfo_for_stmt (new_phi
,
3060 new_stmt_vec_info (new_phi
, loop_vinfo
,
3062 add_phi_arg (new_phi
, arginfo
[i
].op
,
3063 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3065 = POINTER_TYPE_P (TREE_TYPE (op
))
3066 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3067 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3068 ? sizetype
: TREE_TYPE (op
);
3070 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3072 tree tcst
= wide_int_to_tree (type
, cst
);
3073 tree phi_arg
= copy_ssa_name (op
);
3075 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3076 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3077 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3078 set_vinfo_for_stmt (new_stmt
,
3079 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3081 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3083 arginfo
[i
].op
= phi_res
;
3084 vargs
.safe_push (phi_res
);
3089 = POINTER_TYPE_P (TREE_TYPE (op
))
3090 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3091 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3092 ? sizetype
: TREE_TYPE (op
);
3094 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3096 tree tcst
= wide_int_to_tree (type
, cst
);
3097 new_temp
= make_ssa_name (TREE_TYPE (op
));
3098 new_stmt
= gimple_build_assign (new_temp
, code
,
3099 arginfo
[i
].op
, tcst
);
3100 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3101 vargs
.safe_push (new_temp
);
3104 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3110 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3113 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3115 new_temp
= create_tmp_var (ratype
);
3116 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3117 == TYPE_VECTOR_SUBPARTS (rtype
))
3118 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3120 new_temp
= make_ssa_name (rtype
, new_stmt
);
3121 gimple_call_set_lhs (new_stmt
, new_temp
);
3123 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3127 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3130 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3131 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3132 gcc_assert ((k
& (k
- 1)) == 0);
3133 for (l
= 0; l
< k
; l
++)
3138 t
= build_fold_addr_expr (new_temp
);
3139 t
= build2 (MEM_REF
, vectype
, t
,
3140 build_int_cst (TREE_TYPE (t
),
3141 l
* prec
/ BITS_PER_UNIT
));
3144 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3145 size_int (prec
), bitsize_int (l
* prec
));
3147 = gimple_build_assign (make_ssa_name (vectype
), t
);
3148 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3149 if (j
== 0 && l
== 0)
3150 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3152 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3154 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3159 tree clobber
= build_constructor (ratype
, NULL
);
3160 TREE_THIS_VOLATILE (clobber
) = 1;
3161 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3162 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3166 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3168 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3169 / TYPE_VECTOR_SUBPARTS (rtype
));
3170 gcc_assert ((k
& (k
- 1)) == 0);
3171 if ((j
& (k
- 1)) == 0)
3172 vec_alloc (ret_ctor_elts
, k
);
3175 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3176 for (m
= 0; m
< o
; m
++)
3178 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3179 size_int (m
), NULL_TREE
, NULL_TREE
);
3181 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3182 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3183 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3184 gimple_assign_lhs (new_stmt
));
3186 tree clobber
= build_constructor (ratype
, NULL
);
3187 TREE_THIS_VOLATILE (clobber
) = 1;
3188 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3189 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3192 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3193 if ((j
& (k
- 1)) != k
- 1)
3195 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3197 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3198 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3200 if ((unsigned) j
== k
- 1)
3201 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3203 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3205 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3210 tree t
= build_fold_addr_expr (new_temp
);
3211 t
= build2 (MEM_REF
, vectype
, t
,
3212 build_int_cst (TREE_TYPE (t
), 0));
3214 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3215 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3216 tree clobber
= build_constructor (ratype
, NULL
);
3217 TREE_THIS_VOLATILE (clobber
) = 1;
3218 vect_finish_stmt_generation (stmt
,
3219 gimple_build_assign (new_temp
,
3225 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3227 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3229 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3234 /* The call in STMT might prevent it from being removed in dce.
3235 We however cannot remove it here, due to the way the ssa name
3236 it defines is mapped to the new definition. So just replace
3237 rhs of the statement with something harmless. */
3244 type
= TREE_TYPE (scalar_dest
);
3245 if (is_pattern_stmt_p (stmt_info
))
3246 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3248 lhs
= gimple_call_lhs (stmt
);
3249 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3252 new_stmt
= gimple_build_nop ();
3253 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3254 set_vinfo_for_stmt (stmt
, NULL
);
3255 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3256 gsi_replace (gsi
, new_stmt
, true);
3257 unlink_stmt_vdef (stmt
);
3263 /* Function vect_gen_widened_results_half
3265 Create a vector stmt whose code, type, number of arguments, and result
3266 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3267 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3268 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3269 needs to be created (DECL is a function-decl of a target-builtin).
3270 STMT is the original scalar stmt that we are vectorizing. */
3273 vect_gen_widened_results_half (enum tree_code code
,
3275 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3276 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3282 /* Generate half of the widened result: */
3283 if (code
== CALL_EXPR
)
3285 /* Target specific support */
3286 if (op_type
== binary_op
)
3287 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3289 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3290 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3291 gimple_call_set_lhs (new_stmt
, new_temp
);
3295 /* Generic support */
3296 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3297 if (op_type
!= binary_op
)
3299 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3300 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3301 gimple_assign_set_lhs (new_stmt
, new_temp
);
3303 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3309 /* Get vectorized definitions for loop-based vectorization. For the first
3310 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3311 scalar operand), and for the rest we get a copy with
3312 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3313 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3314 The vectors are collected into VEC_OPRNDS. */
3317 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3318 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3322 /* Get first vector operand. */
3323 /* All the vector operands except the very first one (that is scalar oprnd)
3325 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3326 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3328 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3330 vec_oprnds
->quick_push (vec_oprnd
);
3332 /* Get second vector operand. */
3333 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3334 vec_oprnds
->quick_push (vec_oprnd
);
3338 /* For conversion in multiple steps, continue to get operands
3341 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3345 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3346 For multi-step conversions store the resulting vectors and call the function
3350 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3351 int multi_step_cvt
, gimple stmt
,
3353 gimple_stmt_iterator
*gsi
,
3354 slp_tree slp_node
, enum tree_code code
,
3355 stmt_vec_info
*prev_stmt_info
)
3358 tree vop0
, vop1
, new_tmp
, vec_dest
;
3360 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3362 vec_dest
= vec_dsts
.pop ();
3364 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3366 /* Create demotion operation. */
3367 vop0
= (*vec_oprnds
)[i
];
3368 vop1
= (*vec_oprnds
)[i
+ 1];
3369 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3370 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3371 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3372 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3375 /* Store the resulting vector for next recursive call. */
3376 (*vec_oprnds
)[i
/2] = new_tmp
;
3379 /* This is the last step of the conversion sequence. Store the
3380 vectors in SLP_NODE or in vector info of the scalar statement
3381 (or in STMT_VINFO_RELATED_STMT chain). */
3383 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3385 if (!*prev_stmt_info
)
3386 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3388 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3390 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3394 /* For multi-step demotion operations we first generate demotion operations
3395 from the source type to the intermediate types, and then combine the
3396 results (stored in VEC_OPRNDS) in demotion operation to the destination
3400 /* At each level of recursion we have half of the operands we had at the
3402 vec_oprnds
->truncate ((i
+1)/2);
3403 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3404 stmt
, vec_dsts
, gsi
, slp_node
,
3405 VEC_PACK_TRUNC_EXPR
,
3409 vec_dsts
.quick_push (vec_dest
);
3413 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3414 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3415 the resulting vectors and call the function recursively. */
3418 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3419 vec
<tree
> *vec_oprnds1
,
3420 gimple stmt
, tree vec_dest
,
3421 gimple_stmt_iterator
*gsi
,
3422 enum tree_code code1
,
3423 enum tree_code code2
, tree decl1
,
3424 tree decl2
, int op_type
)
3427 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3428 gimple new_stmt1
, new_stmt2
;
3429 vec
<tree
> vec_tmp
= vNULL
;
3431 vec_tmp
.create (vec_oprnds0
->length () * 2);
3432 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3434 if (op_type
== binary_op
)
3435 vop1
= (*vec_oprnds1
)[i
];
3439 /* Generate the two halves of promotion operation. */
3440 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3441 op_type
, vec_dest
, gsi
, stmt
);
3442 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3443 op_type
, vec_dest
, gsi
, stmt
);
3444 if (is_gimple_call (new_stmt1
))
3446 new_tmp1
= gimple_call_lhs (new_stmt1
);
3447 new_tmp2
= gimple_call_lhs (new_stmt2
);
3451 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3452 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3455 /* Store the results for the next step. */
3456 vec_tmp
.quick_push (new_tmp1
);
3457 vec_tmp
.quick_push (new_tmp2
);
3460 vec_oprnds0
->release ();
3461 *vec_oprnds0
= vec_tmp
;
3465 /* Check if STMT performs a conversion operation, that can be vectorized.
3466 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3467 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3468 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3471 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3472 gimple
*vec_stmt
, slp_tree slp_node
)
3476 tree op0
, op1
= NULL_TREE
;
3477 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3478 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3479 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3480 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3481 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3482 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3486 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3487 gimple new_stmt
= NULL
;
3488 stmt_vec_info prev_stmt_info
;
3491 tree vectype_out
, vectype_in
;
3493 tree lhs_type
, rhs_type
;
3494 enum { NARROW
, NONE
, WIDEN
} modifier
;
3495 vec
<tree
> vec_oprnds0
= vNULL
;
3496 vec
<tree
> vec_oprnds1
= vNULL
;
3498 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3499 int multi_step_cvt
= 0;
3500 vec
<tree
> vec_dsts
= vNULL
;
3501 vec
<tree
> interm_types
= vNULL
;
3502 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3504 machine_mode rhs_mode
;
3505 unsigned short fltsz
;
3507 /* Is STMT a vectorizable conversion? */
3509 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3512 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3515 if (!is_gimple_assign (stmt
))
3518 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3521 code
= gimple_assign_rhs_code (stmt
);
3522 if (!CONVERT_EXPR_CODE_P (code
)
3523 && code
!= FIX_TRUNC_EXPR
3524 && code
!= FLOAT_EXPR
3525 && code
!= WIDEN_MULT_EXPR
3526 && code
!= WIDEN_LSHIFT_EXPR
)
3529 op_type
= TREE_CODE_LENGTH (code
);
3531 /* Check types of lhs and rhs. */
3532 scalar_dest
= gimple_assign_lhs (stmt
);
3533 lhs_type
= TREE_TYPE (scalar_dest
);
3534 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3536 op0
= gimple_assign_rhs1 (stmt
);
3537 rhs_type
= TREE_TYPE (op0
);
3539 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3540 && !((INTEGRAL_TYPE_P (lhs_type
)
3541 && INTEGRAL_TYPE_P (rhs_type
))
3542 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3543 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3546 if ((INTEGRAL_TYPE_P (lhs_type
)
3547 && (TYPE_PRECISION (lhs_type
)
3548 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3549 || (INTEGRAL_TYPE_P (rhs_type
)
3550 && (TYPE_PRECISION (rhs_type
)
3551 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3553 if (dump_enabled_p ())
3554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3555 "type conversion to/from bit-precision unsupported."
3560 /* Check the operands of the operation. */
3561 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3562 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3564 if (dump_enabled_p ())
3565 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3566 "use not simple.\n");
3569 if (op_type
== binary_op
)
3573 op1
= gimple_assign_rhs2 (stmt
);
3574 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3575 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3577 if (CONSTANT_CLASS_P (op0
))
3578 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3579 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3581 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3586 if (dump_enabled_p ())
3587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3588 "use not simple.\n");
3593 /* If op0 is an external or constant defs use a vector type of
3594 the same size as the output vector type. */
3596 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3598 gcc_assert (vectype_in
);
3601 if (dump_enabled_p ())
3603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3604 "no vectype for scalar type ");
3605 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3606 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3612 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3613 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3614 if (nunits_in
< nunits_out
)
3616 else if (nunits_out
== nunits_in
)
3621 /* Multiple types in SLP are handled by creating the appropriate number of
3622 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3624 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3626 else if (modifier
== NARROW
)
3627 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3629 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3631 /* Sanity check: make sure that at least one copy of the vectorized stmt
3632 needs to be generated. */
3633 gcc_assert (ncopies
>= 1);
3635 /* Supportable by target? */
3639 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3641 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3646 if (dump_enabled_p ())
3647 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3648 "conversion not supported by target.\n");
3652 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3653 &code1
, &code2
, &multi_step_cvt
,
3656 /* Binary widening operation can only be supported directly by the
3658 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3662 if (code
!= FLOAT_EXPR
3663 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3664 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3667 rhs_mode
= TYPE_MODE (rhs_type
);
3668 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3669 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3670 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3671 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3674 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3675 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3676 if (cvt_type
== NULL_TREE
)
3679 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3681 if (!supportable_convert_operation (code
, vectype_out
,
3682 cvt_type
, &decl1
, &codecvt1
))
3685 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3686 cvt_type
, &codecvt1
,
3687 &codecvt2
, &multi_step_cvt
,
3691 gcc_assert (multi_step_cvt
== 0);
3693 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3694 vectype_in
, &code1
, &code2
,
3695 &multi_step_cvt
, &interm_types
))
3699 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3702 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3703 codecvt2
= ERROR_MARK
;
3707 interm_types
.safe_push (cvt_type
);
3708 cvt_type
= NULL_TREE
;
3713 gcc_assert (op_type
== unary_op
);
3714 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3715 &code1
, &multi_step_cvt
,
3719 if (code
!= FIX_TRUNC_EXPR
3720 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3721 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3724 rhs_mode
= TYPE_MODE (rhs_type
);
3726 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3727 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3728 if (cvt_type
== NULL_TREE
)
3730 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3733 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3734 &code1
, &multi_step_cvt
,
3743 if (!vec_stmt
) /* transformation not required. */
3745 if (dump_enabled_p ())
3746 dump_printf_loc (MSG_NOTE
, vect_location
,
3747 "=== vectorizable_conversion ===\n");
3748 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3750 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3751 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3753 else if (modifier
== NARROW
)
3755 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3756 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3760 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3761 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3763 interm_types
.release ();
3768 if (dump_enabled_p ())
3769 dump_printf_loc (MSG_NOTE
, vect_location
,
3770 "transform conversion. ncopies = %d.\n", ncopies
);
3772 if (op_type
== binary_op
)
3774 if (CONSTANT_CLASS_P (op0
))
3775 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3776 else if (CONSTANT_CLASS_P (op1
))
3777 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3780 /* In case of multi-step conversion, we first generate conversion operations
3781 to the intermediate types, and then from that types to the final one.
3782 We create vector destinations for the intermediate type (TYPES) received
3783 from supportable_*_operation, and store them in the correct order
3784 for future use in vect_create_vectorized_*_stmts (). */
3785 vec_dsts
.create (multi_step_cvt
+ 1);
3786 vec_dest
= vect_create_destination_var (scalar_dest
,
3787 (cvt_type
&& modifier
== WIDEN
)
3788 ? cvt_type
: vectype_out
);
3789 vec_dsts
.quick_push (vec_dest
);
3793 for (i
= interm_types
.length () - 1;
3794 interm_types
.iterate (i
, &intermediate_type
); i
--)
3796 vec_dest
= vect_create_destination_var (scalar_dest
,
3798 vec_dsts
.quick_push (vec_dest
);
3803 vec_dest
= vect_create_destination_var (scalar_dest
,
3805 ? vectype_out
: cvt_type
);
3809 if (modifier
== WIDEN
)
3811 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3812 if (op_type
== binary_op
)
3813 vec_oprnds1
.create (1);
3815 else if (modifier
== NARROW
)
3816 vec_oprnds0
.create (
3817 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3819 else if (code
== WIDEN_LSHIFT_EXPR
)
3820 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3823 prev_stmt_info
= NULL
;
3827 for (j
= 0; j
< ncopies
; j
++)
3830 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3833 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3835 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3837 /* Arguments are ready, create the new vector stmt. */
3838 if (code1
== CALL_EXPR
)
3840 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3841 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3842 gimple_call_set_lhs (new_stmt
, new_temp
);
3846 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3847 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3848 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3849 gimple_assign_set_lhs (new_stmt
, new_temp
);
3852 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3854 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3858 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3860 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3861 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3866 /* In case the vectorization factor (VF) is bigger than the number
3867 of elements that we can fit in a vectype (nunits), we have to
3868 generate more than one vector stmt - i.e - we need to "unroll"
3869 the vector stmt by a factor VF/nunits. */
3870 for (j
= 0; j
< ncopies
; j
++)
3877 if (code
== WIDEN_LSHIFT_EXPR
)
3882 /* Store vec_oprnd1 for every vector stmt to be created
3883 for SLP_NODE. We check during the analysis that all
3884 the shift arguments are the same. */
3885 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3886 vec_oprnds1
.quick_push (vec_oprnd1
);
3888 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3892 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3893 &vec_oprnds1
, slp_node
, -1);
3897 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3898 vec_oprnds0
.quick_push (vec_oprnd0
);
3899 if (op_type
== binary_op
)
3901 if (code
== WIDEN_LSHIFT_EXPR
)
3904 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3906 vec_oprnds1
.quick_push (vec_oprnd1
);
3912 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3913 vec_oprnds0
.truncate (0);
3914 vec_oprnds0
.quick_push (vec_oprnd0
);
3915 if (op_type
== binary_op
)
3917 if (code
== WIDEN_LSHIFT_EXPR
)
3920 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3922 vec_oprnds1
.truncate (0);
3923 vec_oprnds1
.quick_push (vec_oprnd1
);
3927 /* Arguments are ready. Create the new vector stmts. */
3928 for (i
= multi_step_cvt
; i
>= 0; i
--)
3930 tree this_dest
= vec_dsts
[i
];
3931 enum tree_code c1
= code1
, c2
= code2
;
3932 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3937 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3939 stmt
, this_dest
, gsi
,
3940 c1
, c2
, decl1
, decl2
,
3944 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3948 if (codecvt1
== CALL_EXPR
)
3950 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3951 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3952 gimple_call_set_lhs (new_stmt
, new_temp
);
3956 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3957 new_temp
= make_ssa_name (vec_dest
);
3958 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
3962 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3965 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3968 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3970 if (!prev_stmt_info
)
3971 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3973 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3974 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3978 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3982 /* In case the vectorization factor (VF) is bigger than the number
3983 of elements that we can fit in a vectype (nunits), we have to
3984 generate more than one vector stmt - i.e - we need to "unroll"
3985 the vector stmt by a factor VF/nunits. */
3986 for (j
= 0; j
< ncopies
; j
++)
3990 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3994 vec_oprnds0
.truncate (0);
3995 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3996 vect_pow2 (multi_step_cvt
) - 1);
3999 /* Arguments are ready. Create the new vector stmts. */
4001 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4003 if (codecvt1
== CALL_EXPR
)
4005 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4006 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4007 gimple_call_set_lhs (new_stmt
, new_temp
);
4011 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4012 new_temp
= make_ssa_name (vec_dest
);
4013 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4017 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4018 vec_oprnds0
[i
] = new_temp
;
4021 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4022 stmt
, vec_dsts
, gsi
,
4027 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4031 vec_oprnds0
.release ();
4032 vec_oprnds1
.release ();
4033 vec_dsts
.release ();
4034 interm_types
.release ();
4040 /* Function vectorizable_assignment.
4042 Check if STMT performs an assignment (copy) that can be vectorized.
4043 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4044 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4045 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4048 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4049 gimple
*vec_stmt
, slp_tree slp_node
)
4054 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4055 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4056 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4060 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4061 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4064 vec
<tree
> vec_oprnds
= vNULL
;
4066 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4067 gimple new_stmt
= NULL
;
4068 stmt_vec_info prev_stmt_info
= NULL
;
4069 enum tree_code code
;
4072 /* Multiple types in SLP are handled by creating the appropriate number of
4073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4075 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4078 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4080 gcc_assert (ncopies
>= 1);
4082 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4085 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4088 /* Is vectorizable assignment? */
4089 if (!is_gimple_assign (stmt
))
4092 scalar_dest
= gimple_assign_lhs (stmt
);
4093 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4096 code
= gimple_assign_rhs_code (stmt
);
4097 if (gimple_assign_single_p (stmt
)
4098 || code
== PAREN_EXPR
4099 || CONVERT_EXPR_CODE_P (code
))
4100 op
= gimple_assign_rhs1 (stmt
);
4104 if (code
== VIEW_CONVERT_EXPR
)
4105 op
= TREE_OPERAND (op
, 0);
4107 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4108 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4110 if (dump_enabled_p ())
4111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4112 "use not simple.\n");
4116 /* We can handle NOP_EXPR conversions that do not change the number
4117 of elements or the vector size. */
4118 if ((CONVERT_EXPR_CODE_P (code
)
4119 || code
== VIEW_CONVERT_EXPR
)
4121 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4122 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4123 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4126 /* We do not handle bit-precision changes. */
4127 if ((CONVERT_EXPR_CODE_P (code
)
4128 || code
== VIEW_CONVERT_EXPR
)
4129 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4130 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4131 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4132 || ((TYPE_PRECISION (TREE_TYPE (op
))
4133 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4134 /* But a conversion that does not change the bit-pattern is ok. */
4135 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4136 > TYPE_PRECISION (TREE_TYPE (op
)))
4137 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4139 if (dump_enabled_p ())
4140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4141 "type conversion to/from bit-precision "
4146 if (!vec_stmt
) /* transformation not required. */
4148 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4149 if (dump_enabled_p ())
4150 dump_printf_loc (MSG_NOTE
, vect_location
,
4151 "=== vectorizable_assignment ===\n");
4152 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4157 if (dump_enabled_p ())
4158 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4161 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4164 for (j
= 0; j
< ncopies
; j
++)
4168 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4170 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4172 /* Arguments are ready. create the new vector stmt. */
4173 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4175 if (CONVERT_EXPR_CODE_P (code
)
4176 || code
== VIEW_CONVERT_EXPR
)
4177 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4178 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4179 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4180 gimple_assign_set_lhs (new_stmt
, new_temp
);
4181 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4183 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4190 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4192 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4194 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4197 vec_oprnds
.release ();
4202 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4203 either as shift by a scalar or by a vector. */
4206 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4209 machine_mode vec_mode
;
4214 vectype
= get_vectype_for_scalar_type (scalar_type
);
4218 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4220 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4222 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4224 || (optab_handler (optab
, TYPE_MODE (vectype
))
4225 == CODE_FOR_nothing
))
4229 vec_mode
= TYPE_MODE (vectype
);
4230 icode
= (int) optab_handler (optab
, vec_mode
);
4231 if (icode
== CODE_FOR_nothing
)
4238 /* Function vectorizable_shift.
4240 Check if STMT performs a shift operation that can be vectorized.
4241 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4242 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4243 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4246 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4247 gimple
*vec_stmt
, slp_tree slp_node
)
4251 tree op0
, op1
= NULL
;
4252 tree vec_oprnd1
= NULL_TREE
;
4253 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4255 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4256 enum tree_code code
;
4257 machine_mode vec_mode
;
4261 machine_mode optab_op2_mode
;
4264 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4265 gimple new_stmt
= NULL
;
4266 stmt_vec_info prev_stmt_info
;
4273 vec
<tree
> vec_oprnds0
= vNULL
;
4274 vec
<tree
> vec_oprnds1
= vNULL
;
4277 bool scalar_shift_arg
= true;
4278 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4281 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4284 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4287 /* Is STMT a vectorizable binary/unary operation? */
4288 if (!is_gimple_assign (stmt
))
4291 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4294 code
= gimple_assign_rhs_code (stmt
);
4296 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4297 || code
== RROTATE_EXPR
))
4300 scalar_dest
= gimple_assign_lhs (stmt
);
4301 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4302 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4303 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4305 if (dump_enabled_p ())
4306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4307 "bit-precision shifts not supported.\n");
4311 op0
= gimple_assign_rhs1 (stmt
);
4312 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4313 &def_stmt
, &def
, &dt
[0], &vectype
))
4315 if (dump_enabled_p ())
4316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4317 "use not simple.\n");
4320 /* If op0 is an external or constant def use a vector type with
4321 the same size as the output vector type. */
4323 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4325 gcc_assert (vectype
);
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4330 "no vectype for scalar type\n");
4334 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4335 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4336 if (nunits_out
!= nunits_in
)
4339 op1
= gimple_assign_rhs2 (stmt
);
4340 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4341 &def
, &dt
[1], &op1_vectype
))
4343 if (dump_enabled_p ())
4344 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4345 "use not simple.\n");
4350 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4354 /* Multiple types in SLP are handled by creating the appropriate number of
4355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4357 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4360 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4362 gcc_assert (ncopies
>= 1);
4364 /* Determine whether the shift amount is a vector, or scalar. If the
4365 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4367 if (dt
[1] == vect_internal_def
&& !slp_node
)
4368 scalar_shift_arg
= false;
4369 else if (dt
[1] == vect_constant_def
4370 || dt
[1] == vect_external_def
4371 || dt
[1] == vect_internal_def
)
4373 /* In SLP, need to check whether the shift count is the same,
4374 in loops if it is a constant or invariant, it is always
4378 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4381 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4382 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4383 scalar_shift_arg
= false;
4388 if (dump_enabled_p ())
4389 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4390 "operand mode requires invariant argument.\n");
4394 /* Vector shifted by vector. */
4395 if (!scalar_shift_arg
)
4397 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4398 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_NOTE
, vect_location
,
4400 "vector/vector shift/rotate found.\n");
4403 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4404 if (op1_vectype
== NULL_TREE
4405 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4407 if (dump_enabled_p ())
4408 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4409 "unusable type for last operand in"
4410 " vector/vector shift/rotate.\n");
4414 /* See if the machine has a vector shifted by scalar insn and if not
4415 then see if it has a vector shifted by vector insn. */
4418 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4420 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4422 if (dump_enabled_p ())
4423 dump_printf_loc (MSG_NOTE
, vect_location
,
4424 "vector/scalar shift/rotate found.\n");
4428 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4430 && (optab_handler (optab
, TYPE_MODE (vectype
))
4431 != CODE_FOR_nothing
))
4433 scalar_shift_arg
= false;
4435 if (dump_enabled_p ())
4436 dump_printf_loc (MSG_NOTE
, vect_location
,
4437 "vector/vector shift/rotate found.\n");
4439 /* Unlike the other binary operators, shifts/rotates have
4440 the rhs being int, instead of the same type as the lhs,
4441 so make sure the scalar is the right type if we are
4442 dealing with vectors of long long/long/short/char. */
4443 if (dt
[1] == vect_constant_def
)
4444 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4445 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4449 && TYPE_MODE (TREE_TYPE (vectype
))
4450 != TYPE_MODE (TREE_TYPE (op1
)))
4452 if (dump_enabled_p ())
4453 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4454 "unusable type for last operand in"
4455 " vector/vector shift/rotate.\n");
4458 if (vec_stmt
&& !slp_node
)
4460 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4461 op1
= vect_init_vector (stmt
, op1
,
4462 TREE_TYPE (vectype
), NULL
);
4469 /* Supportable by target? */
4472 if (dump_enabled_p ())
4473 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4477 vec_mode
= TYPE_MODE (vectype
);
4478 icode
= (int) optab_handler (optab
, vec_mode
);
4479 if (icode
== CODE_FOR_nothing
)
4481 if (dump_enabled_p ())
4482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4483 "op not supported by target.\n");
4484 /* Check only during analysis. */
4485 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4486 || (vf
< vect_min_worthwhile_factor (code
)
4489 if (dump_enabled_p ())
4490 dump_printf_loc (MSG_NOTE
, vect_location
,
4491 "proceeding using word mode.\n");
4494 /* Worthwhile without SIMD support? Check only during analysis. */
4495 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4496 && vf
< vect_min_worthwhile_factor (code
)
4499 if (dump_enabled_p ())
4500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4501 "not worthwhile without SIMD support.\n");
4505 if (!vec_stmt
) /* transformation not required. */
4507 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4508 if (dump_enabled_p ())
4509 dump_printf_loc (MSG_NOTE
, vect_location
,
4510 "=== vectorizable_shift ===\n");
4511 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4517 if (dump_enabled_p ())
4518 dump_printf_loc (MSG_NOTE
, vect_location
,
4519 "transform binary/unary operation.\n");
4522 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4524 prev_stmt_info
= NULL
;
4525 for (j
= 0; j
< ncopies
; j
++)
4530 if (scalar_shift_arg
)
4532 /* Vector shl and shr insn patterns can be defined with scalar
4533 operand 2 (shift operand). In this case, use constant or loop
4534 invariant op1 directly, without extending it to vector mode
4536 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4537 if (!VECTOR_MODE_P (optab_op2_mode
))
4539 if (dump_enabled_p ())
4540 dump_printf_loc (MSG_NOTE
, vect_location
,
4541 "operand 1 using scalar mode.\n");
4543 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4544 vec_oprnds1
.quick_push (vec_oprnd1
);
4547 /* Store vec_oprnd1 for every vector stmt to be created
4548 for SLP_NODE. We check during the analysis that all
4549 the shift arguments are the same.
4550 TODO: Allow different constants for different vector
4551 stmts generated for an SLP instance. */
4552 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4553 vec_oprnds1
.quick_push (vec_oprnd1
);
4558 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4559 (a special case for certain kind of vector shifts); otherwise,
4560 operand 1 should be of a vector type (the usual case). */
4562 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4565 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4569 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4571 /* Arguments are ready. Create the new vector stmt. */
4572 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4574 vop1
= vec_oprnds1
[i
];
4575 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4576 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4577 gimple_assign_set_lhs (new_stmt
, new_temp
);
4578 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4580 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4587 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4589 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4590 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4593 vec_oprnds0
.release ();
4594 vec_oprnds1
.release ();
4600 /* Function vectorizable_operation.
4602 Check if STMT performs a binary, unary or ternary operation that can
4604 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4605 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4606 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4609 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4610 gimple
*vec_stmt
, slp_tree slp_node
)
4614 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4615 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4617 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4618 enum tree_code code
;
4619 machine_mode vec_mode
;
4626 enum vect_def_type dt
[3]
4627 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4628 gimple new_stmt
= NULL
;
4629 stmt_vec_info prev_stmt_info
;
4635 vec
<tree
> vec_oprnds0
= vNULL
;
4636 vec
<tree
> vec_oprnds1
= vNULL
;
4637 vec
<tree
> vec_oprnds2
= vNULL
;
4638 tree vop0
, vop1
, vop2
;
4639 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4642 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4645 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4648 /* Is STMT a vectorizable binary/unary operation? */
4649 if (!is_gimple_assign (stmt
))
4652 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4655 code
= gimple_assign_rhs_code (stmt
);
4657 /* For pointer addition, we should use the normal plus for
4658 the vector addition. */
4659 if (code
== POINTER_PLUS_EXPR
)
4662 /* Support only unary or binary operations. */
4663 op_type
= TREE_CODE_LENGTH (code
);
4664 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4666 if (dump_enabled_p ())
4667 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4668 "num. args = %d (not unary/binary/ternary op).\n",
4673 scalar_dest
= gimple_assign_lhs (stmt
);
4674 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4676 /* Most operations cannot handle bit-precision types without extra
4678 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4679 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4680 /* Exception are bitwise binary operations. */
4681 && code
!= BIT_IOR_EXPR
4682 && code
!= BIT_XOR_EXPR
4683 && code
!= BIT_AND_EXPR
)
4685 if (dump_enabled_p ())
4686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4687 "bit-precision arithmetic not supported.\n");
4691 op0
= gimple_assign_rhs1 (stmt
);
4692 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4693 &def_stmt
, &def
, &dt
[0], &vectype
))
4695 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4697 "use not simple.\n");
4700 /* If op0 is an external or constant def use a vector type with
4701 the same size as the output vector type. */
4703 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4705 gcc_assert (vectype
);
4708 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4711 "no vectype for scalar type ");
4712 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4714 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4720 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4721 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4722 if (nunits_out
!= nunits_in
)
4725 if (op_type
== binary_op
|| op_type
== ternary_op
)
4727 op1
= gimple_assign_rhs2 (stmt
);
4728 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4731 if (dump_enabled_p ())
4732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4733 "use not simple.\n");
4737 if (op_type
== ternary_op
)
4739 op2
= gimple_assign_rhs3 (stmt
);
4740 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4743 if (dump_enabled_p ())
4744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4745 "use not simple.\n");
4751 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4755 /* Multiple types in SLP are handled by creating the appropriate number of
4756 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4758 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4761 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4763 gcc_assert (ncopies
>= 1);
4765 /* Shifts are handled in vectorizable_shift (). */
4766 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4767 || code
== RROTATE_EXPR
)
4770 /* Supportable by target? */
4772 vec_mode
= TYPE_MODE (vectype
);
4773 if (code
== MULT_HIGHPART_EXPR
)
4775 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4776 icode
= LAST_INSN_CODE
;
4778 icode
= CODE_FOR_nothing
;
4782 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4785 if (dump_enabled_p ())
4786 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4790 icode
= (int) optab_handler (optab
, vec_mode
);
4793 if (icode
== CODE_FOR_nothing
)
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4797 "op not supported by target.\n");
4798 /* Check only during analysis. */
4799 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4800 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4802 if (dump_enabled_p ())
4803 dump_printf_loc (MSG_NOTE
, vect_location
,
4804 "proceeding using word mode.\n");
4807 /* Worthwhile without SIMD support? Check only during analysis. */
4808 if (!VECTOR_MODE_P (vec_mode
)
4810 && vf
< vect_min_worthwhile_factor (code
))
4812 if (dump_enabled_p ())
4813 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4814 "not worthwhile without SIMD support.\n");
4818 if (!vec_stmt
) /* transformation not required. */
4820 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4821 if (dump_enabled_p ())
4822 dump_printf_loc (MSG_NOTE
, vect_location
,
4823 "=== vectorizable_operation ===\n");
4824 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4830 if (dump_enabled_p ())
4831 dump_printf_loc (MSG_NOTE
, vect_location
,
4832 "transform binary/unary operation.\n");
4835 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4837 /* In case the vectorization factor (VF) is bigger than the number
4838 of elements that we can fit in a vectype (nunits), we have to generate
4839 more than one vector stmt - i.e - we need to "unroll" the
4840 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4841 from one copy of the vector stmt to the next, in the field
4842 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4843 stages to find the correct vector defs to be used when vectorizing
4844 stmts that use the defs of the current stmt. The example below
4845 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4846 we need to create 4 vectorized stmts):
4848 before vectorization:
4849 RELATED_STMT VEC_STMT
4853 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4855 RELATED_STMT VEC_STMT
4856 VS1_0: vx0 = memref0 VS1_1 -
4857 VS1_1: vx1 = memref1 VS1_2 -
4858 VS1_2: vx2 = memref2 VS1_3 -
4859 VS1_3: vx3 = memref3 - -
4860 S1: x = load - VS1_0
4863 step2: vectorize stmt S2 (done here):
4864 To vectorize stmt S2 we first need to find the relevant vector
4865 def for the first operand 'x'. This is, as usual, obtained from
4866 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4867 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4868 relevant vector def 'vx0'. Having found 'vx0' we can generate
4869 the vector stmt VS2_0, and as usual, record it in the
4870 STMT_VINFO_VEC_STMT of stmt S2.
4871 When creating the second copy (VS2_1), we obtain the relevant vector
4872 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4873 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4874 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4875 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4876 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4877 chain of stmts and pointers:
4878 RELATED_STMT VEC_STMT
4879 VS1_0: vx0 = memref0 VS1_1 -
4880 VS1_1: vx1 = memref1 VS1_2 -
4881 VS1_2: vx2 = memref2 VS1_3 -
4882 VS1_3: vx3 = memref3 - -
4883 S1: x = load - VS1_0
4884 VS2_0: vz0 = vx0 + v1 VS2_1 -
4885 VS2_1: vz1 = vx1 + v1 VS2_2 -
4886 VS2_2: vz2 = vx2 + v1 VS2_3 -
4887 VS2_3: vz3 = vx3 + v1 - -
4888 S2: z = x + 1 - VS2_0 */
4890 prev_stmt_info
= NULL
;
4891 for (j
= 0; j
< ncopies
; j
++)
4896 if (op_type
== binary_op
|| op_type
== ternary_op
)
4897 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4900 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4902 if (op_type
== ternary_op
)
4904 vec_oprnds2
.create (1);
4905 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4912 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4913 if (op_type
== ternary_op
)
4915 tree vec_oprnd
= vec_oprnds2
.pop ();
4916 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4921 /* Arguments are ready. Create the new vector stmt. */
4922 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4924 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4925 ? vec_oprnds1
[i
] : NULL_TREE
);
4926 vop2
= ((op_type
== ternary_op
)
4927 ? vec_oprnds2
[i
] : NULL_TREE
);
4928 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
4929 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4930 gimple_assign_set_lhs (new_stmt
, new_temp
);
4931 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4933 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4940 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4942 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4943 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4946 vec_oprnds0
.release ();
4947 vec_oprnds1
.release ();
4948 vec_oprnds2
.release ();
4953 /* A helper function to ensure data reference DR's base alignment
4957 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4962 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4964 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4965 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4967 if (decl_in_symtab_p (base_decl
))
4968 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
4971 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4972 DECL_USER_ALIGN (base_decl
) = 1;
4974 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4979 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4980 reversal of the vector elements. If that is impossible to do,
4984 perm_mask_for_reverse (tree vectype
)
4989 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4990 sel
= XALLOCAVEC (unsigned char, nunits
);
4992 for (i
= 0; i
< nunits
; ++i
)
4993 sel
[i
] = nunits
- 1 - i
;
4995 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4997 return vect_gen_perm_mask_checked (vectype
, sel
);
5000 /* Function vectorizable_store.
5002 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5004 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5005 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5006 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5009 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5015 tree vec_oprnd
= NULL_TREE
;
5016 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5017 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5018 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5020 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5021 struct loop
*loop
= NULL
;
5022 machine_mode vec_mode
;
5024 enum dr_alignment_support alignment_support_scheme
;
5027 enum vect_def_type dt
;
5028 stmt_vec_info prev_stmt_info
= NULL
;
5029 tree dataref_ptr
= NULL_TREE
;
5030 tree dataref_offset
= NULL_TREE
;
5031 gimple ptr_incr
= NULL
;
5032 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5035 gimple next_stmt
, first_stmt
= NULL
;
5036 bool grouped_store
= false;
5037 bool store_lanes_p
= false;
5038 unsigned int group_size
, i
;
5039 vec
<tree
> dr_chain
= vNULL
;
5040 vec
<tree
> oprnds
= vNULL
;
5041 vec
<tree
> result_chain
= vNULL
;
5043 bool negative
= false;
5044 tree offset
= NULL_TREE
;
5045 vec
<tree
> vec_oprnds
= vNULL
;
5046 bool slp
= (slp_node
!= NULL
);
5047 unsigned int vec_num
;
5048 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5052 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5054 /* Multiple types in SLP are handled by creating the appropriate number of
5055 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5057 if (slp
|| PURE_SLP_STMT (stmt_info
))
5060 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5062 gcc_assert (ncopies
>= 1);
5064 /* FORNOW. This restriction should be relaxed. */
5065 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5069 "multiple types in nested loop.\n");
5073 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5076 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5079 /* Is vectorizable store? */
5081 if (!is_gimple_assign (stmt
))
5084 scalar_dest
= gimple_assign_lhs (stmt
);
5085 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5086 && is_pattern_stmt_p (stmt_info
))
5087 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5088 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5089 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5090 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5091 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5092 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5093 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5094 && TREE_CODE (scalar_dest
) != MEM_REF
)
5097 gcc_assert (gimple_assign_single_p (stmt
));
5098 op
= gimple_assign_rhs1 (stmt
);
5099 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5102 if (dump_enabled_p ())
5103 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5104 "use not simple.\n");
5108 elem_type
= TREE_TYPE (vectype
);
5109 vec_mode
= TYPE_MODE (vectype
);
5111 /* FORNOW. In some cases can vectorize even if data-type not supported
5112 (e.g. - array initialization with 0). */
5113 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5116 if (!STMT_VINFO_DATA_REF (stmt_info
))
5119 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5122 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5123 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5124 size_zero_node
) < 0;
5125 if (negative
&& ncopies
> 1)
5127 if (dump_enabled_p ())
5128 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5129 "multiple types with negative step.\n");
5134 gcc_assert (!grouped_store
);
5135 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5136 if (alignment_support_scheme
!= dr_aligned
5137 && alignment_support_scheme
!= dr_unaligned_supported
)
5139 if (dump_enabled_p ())
5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5141 "negative step but alignment required.\n");
5144 if (dt
!= vect_constant_def
5145 && dt
!= vect_external_def
5146 && !perm_mask_for_reverse (vectype
))
5148 if (dump_enabled_p ())
5149 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5150 "negative step and reversing not supported.\n");
5156 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5158 grouped_store
= true;
5159 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5160 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5162 && !PURE_SLP_STMT (stmt_info
)
5163 && !STMT_VINFO_STRIDED_P (stmt_info
))
5165 if (vect_store_lanes_supported (vectype
, group_size
))
5166 store_lanes_p
= true;
5167 else if (!vect_grouped_store_supported (vectype
, group_size
))
5171 if (STMT_VINFO_STRIDED_P (stmt_info
)
5172 && (slp
|| PURE_SLP_STMT (stmt_info
))
5173 && (group_size
> nunits
5174 || nunits
% group_size
!= 0))
5176 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5177 "unhandled strided group store\n");
5181 if (first_stmt
== stmt
)
5183 /* STMT is the leader of the group. Check the operands of all the
5184 stmts of the group. */
5185 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5188 gcc_assert (gimple_assign_single_p (next_stmt
));
5189 op
= gimple_assign_rhs1 (next_stmt
);
5190 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5191 &def_stmt
, &def
, &dt
))
5193 if (dump_enabled_p ())
5194 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5195 "use not simple.\n");
5198 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5203 if (!vec_stmt
) /* transformation not required. */
5205 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5206 /* The SLP costs are calculated during SLP analysis. */
5207 if (!PURE_SLP_STMT (stmt_info
))
5208 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5215 ensure_base_align (stmt_info
, dr
);
5219 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5220 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5222 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5225 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5227 /* We vectorize all the stmts of the interleaving group when we
5228 reach the last stmt in the group. */
5229 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5230 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5239 grouped_store
= false;
5240 /* VEC_NUM is the number of vect stmts to be created for this
5242 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5243 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5244 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5245 op
= gimple_assign_rhs1 (first_stmt
);
5248 /* VEC_NUM is the number of vect stmts to be created for this
5250 vec_num
= group_size
;
5256 group_size
= vec_num
= 1;
5259 if (dump_enabled_p ())
5260 dump_printf_loc (MSG_NOTE
, vect_location
,
5261 "transform store. ncopies = %d\n", ncopies
);
5263 if (STMT_VINFO_STRIDED_P (stmt_info
))
5265 gimple_stmt_iterator incr_gsi
;
5271 gimple_seq stmts
= NULL
;
5272 tree stride_base
, stride_step
, alias_off
;
5275 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5278 = fold_build_pointer_plus
5279 (unshare_expr (DR_BASE_ADDRESS (dr
)),
5280 size_binop (PLUS_EXPR
,
5281 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
5282 convert_to_ptrofftype (DR_INIT(dr
))));
5283 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
5285 /* For a store with loop-invariant (but other than power-of-2)
5286 stride (i.e. not a grouped access) like so:
5288 for (i = 0; i < n; i += stride)
5291 we generate a new induction variable and new stores from
5292 the components of the (vectorized) rhs:
5294 for (j = 0; ; j += VF*stride)
5299 array[j + stride] = tmp2;
5303 unsigned nstores
= nunits
;
5304 tree ltype
= elem_type
;
5307 nstores
= nunits
/ group_size
;
5308 if (group_size
< nunits
)
5309 ltype
= build_vector_type (elem_type
, group_size
);
5312 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5313 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5316 ivstep
= stride_step
;
5317 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5318 build_int_cst (TREE_TYPE (ivstep
),
5319 ncopies
* nstores
));
5321 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5323 create_iv (stride_base
, ivstep
, NULL
,
5324 loop
, &incr_gsi
, insert_after
,
5326 incr
= gsi_stmt (incr_gsi
);
5327 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
5329 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5331 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5333 prev_stmt_info
= NULL
;
5334 running_off
= offvar
;
5335 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
5336 for (j
= 0; j
< ncopies
; j
++)
5338 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5339 and first_stmt == stmt. */
5341 vec_oprnd
= vect_get_vec_def_for_operand (op
, first_stmt
, NULL
);
5343 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5345 for (i
= 0; i
< nstores
; i
++)
5347 tree newref
, newoff
;
5348 gimple incr
, assign
;
5349 tree size
= TYPE_SIZE (ltype
);
5350 /* Extract the i'th component. */
5351 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (i
),
5353 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5356 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5360 newref
= build2 (MEM_REF
, ltype
,
5361 running_off
, alias_off
);
5363 /* And store it to *running_off. */
5364 assign
= gimple_build_assign (newref
, elem
);
5365 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5367 newoff
= copy_ssa_name (running_off
, NULL
);
5368 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5369 running_off
, stride_step
);
5370 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5372 running_off
= newoff
;
5373 if (j
== 0 && i
== i
)
5374 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= assign
;
5376 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5377 prev_stmt_info
= vinfo_for_stmt (assign
);
5383 dr_chain
.create (group_size
);
5384 oprnds
.create (group_size
);
5386 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5387 gcc_assert (alignment_support_scheme
);
5388 /* Targets with store-lane instructions must not require explicit
5390 gcc_assert (!store_lanes_p
5391 || alignment_support_scheme
== dr_aligned
5392 || alignment_support_scheme
== dr_unaligned_supported
);
5395 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5398 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5400 aggr_type
= vectype
;
5402 /* In case the vectorization factor (VF) is bigger than the number
5403 of elements that we can fit in a vectype (nunits), we have to generate
5404 more than one vector stmt - i.e - we need to "unroll" the
5405 vector stmt by a factor VF/nunits. For more details see documentation in
5406 vect_get_vec_def_for_copy_stmt. */
5408 /* In case of interleaving (non-unit grouped access):
5415 We create vectorized stores starting from base address (the access of the
5416 first stmt in the chain (S2 in the above example), when the last store stmt
5417 of the chain (S4) is reached:
5420 VS2: &base + vec_size*1 = vx0
5421 VS3: &base + vec_size*2 = vx1
5422 VS4: &base + vec_size*3 = vx3
5424 Then permutation statements are generated:
5426 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5427 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5430 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5431 (the order of the data-refs in the output of vect_permute_store_chain
5432 corresponds to the order of scalar stmts in the interleaving chain - see
5433 the documentation of vect_permute_store_chain()).
5435 In case of both multiple types and interleaving, above vector stores and
5436 permutation stmts are created for every copy. The result vector stmts are
5437 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5438 STMT_VINFO_RELATED_STMT for the next copies.
5441 prev_stmt_info
= NULL
;
5442 for (j
= 0; j
< ncopies
; j
++)
5450 /* Get vectorized arguments for SLP_NODE. */
5451 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5452 NULL
, slp_node
, -1);
5454 vec_oprnd
= vec_oprnds
[0];
5458 /* For interleaved stores we collect vectorized defs for all the
5459 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5460 used as an input to vect_permute_store_chain(), and OPRNDS as
5461 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5463 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5464 OPRNDS are of size 1. */
5465 next_stmt
= first_stmt
;
5466 for (i
= 0; i
< group_size
; i
++)
5468 /* Since gaps are not supported for interleaved stores,
5469 GROUP_SIZE is the exact number of stmts in the chain.
5470 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5471 there is no interleaving, GROUP_SIZE is 1, and only one
5472 iteration of the loop will be executed. */
5473 gcc_assert (next_stmt
5474 && gimple_assign_single_p (next_stmt
));
5475 op
= gimple_assign_rhs1 (next_stmt
);
5477 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5479 dr_chain
.quick_push (vec_oprnd
);
5480 oprnds
.quick_push (vec_oprnd
);
5481 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5485 /* We should have catched mismatched types earlier. */
5486 gcc_assert (useless_type_conversion_p (vectype
,
5487 TREE_TYPE (vec_oprnd
)));
5488 bool simd_lane_access_p
5489 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5490 if (simd_lane_access_p
5491 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5492 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5493 && integer_zerop (DR_OFFSET (first_dr
))
5494 && integer_zerop (DR_INIT (first_dr
))
5495 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5496 get_alias_set (DR_REF (first_dr
))))
5498 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5499 dataref_offset
= build_int_cst (reference_alias_ptr_type
5500 (DR_REF (first_dr
)), 0);
5505 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5506 simd_lane_access_p
? loop
: NULL
,
5507 offset
, &dummy
, gsi
, &ptr_incr
,
5508 simd_lane_access_p
, &inv_p
);
5509 gcc_assert (bb_vinfo
|| !inv_p
);
5513 /* For interleaved stores we created vectorized defs for all the
5514 defs stored in OPRNDS in the previous iteration (previous copy).
5515 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5516 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5518 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5519 OPRNDS are of size 1. */
5520 for (i
= 0; i
< group_size
; i
++)
5523 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5525 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5526 dr_chain
[i
] = vec_oprnd
;
5527 oprnds
[i
] = vec_oprnd
;
5531 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5532 TYPE_SIZE_UNIT (aggr_type
));
5534 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5535 TYPE_SIZE_UNIT (aggr_type
));
5542 /* Combine all the vectors into an array. */
5543 vec_array
= create_vector_array (vectype
, vec_num
);
5544 for (i
= 0; i
< vec_num
; i
++)
5546 vec_oprnd
= dr_chain
[i
];
5547 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5551 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5552 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5553 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5554 gimple_call_set_lhs (new_stmt
, data_ref
);
5555 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5563 result_chain
.create (group_size
);
5565 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5569 next_stmt
= first_stmt
;
5570 for (i
= 0; i
< vec_num
; i
++)
5572 unsigned align
, misalign
;
5575 /* Bump the vector pointer. */
5576 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5580 vec_oprnd
= vec_oprnds
[i
];
5581 else if (grouped_store
)
5582 /* For grouped stores vectorized defs are interleaved in
5583 vect_permute_store_chain(). */
5584 vec_oprnd
= result_chain
[i
];
5586 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5589 : build_int_cst (reference_alias_ptr_type
5590 (DR_REF (first_dr
)), 0));
5591 align
= TYPE_ALIGN_UNIT (vectype
);
5592 if (aligned_access_p (first_dr
))
5594 else if (DR_MISALIGNMENT (first_dr
) == -1)
5596 TREE_TYPE (data_ref
)
5597 = build_aligned_type (TREE_TYPE (data_ref
),
5598 TYPE_ALIGN (elem_type
));
5599 align
= TYPE_ALIGN_UNIT (elem_type
);
5604 TREE_TYPE (data_ref
)
5605 = build_aligned_type (TREE_TYPE (data_ref
),
5606 TYPE_ALIGN (elem_type
));
5607 misalign
= DR_MISALIGNMENT (first_dr
);
5609 if (dataref_offset
== NULL_TREE
)
5610 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5614 && dt
!= vect_constant_def
5615 && dt
!= vect_external_def
)
5617 tree perm_mask
= perm_mask_for_reverse (vectype
);
5619 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5621 tree new_temp
= make_ssa_name (perm_dest
);
5623 /* Generate the permute statement. */
5625 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
5626 vec_oprnd
, perm_mask
);
5627 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5629 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5630 vec_oprnd
= new_temp
;
5633 /* Arguments are ready. Create the new vector stmt. */
5634 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5635 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5640 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5648 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5650 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5651 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5655 dr_chain
.release ();
5657 result_chain
.release ();
5658 vec_oprnds
.release ();
5663 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5664 VECTOR_CST mask. No checks are made that the target platform supports the
5665 mask, so callers may wish to test can_vec_perm_p separately, or use
5666 vect_gen_perm_mask_checked. */
5669 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
5671 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5674 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5676 mask_elt_type
= lang_hooks
.types
.type_for_mode
5677 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5678 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5680 mask_elts
= XALLOCAVEC (tree
, nunits
);
5681 for (i
= nunits
- 1; i
>= 0; i
--)
5682 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5683 mask_vec
= build_vector (mask_type
, mask_elts
);
5688 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5689 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5692 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
5694 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
5695 return vect_gen_perm_mask_any (vectype
, sel
);
5698 /* Given a vector variable X and Y, that was generated for the scalar
5699 STMT, generate instructions to permute the vector elements of X and Y
5700 using permutation mask MASK_VEC, insert them at *GSI and return the
5701 permuted vector variable. */
5704 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5705 gimple_stmt_iterator
*gsi
)
5707 tree vectype
= TREE_TYPE (x
);
5708 tree perm_dest
, data_ref
;
5711 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5712 data_ref
= make_ssa_name (perm_dest
);
5714 /* Generate the permute statement. */
5715 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
5716 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5721 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5722 inserting them on the loops preheader edge. Returns true if we
5723 were successful in doing so (and thus STMT can be moved then),
5724 otherwise returns false. */
5727 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5733 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5735 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5736 if (!gimple_nop_p (def_stmt
)
5737 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5739 /* Make sure we don't need to recurse. While we could do
5740 so in simple cases when there are more complex use webs
5741 we don't have an easy way to preserve stmt order to fulfil
5742 dependencies within them. */
5745 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5747 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5749 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5750 if (!gimple_nop_p (def_stmt2
)
5751 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5761 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5763 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5764 if (!gimple_nop_p (def_stmt
)
5765 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5767 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5768 gsi_remove (&gsi
, false);
5769 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5776 /* vectorizable_load.
5778 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5780 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5781 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5782 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5785 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5786 slp_tree slp_node
, slp_instance slp_node_instance
)
5789 tree vec_dest
= NULL
;
5790 tree data_ref
= NULL
;
5791 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5792 stmt_vec_info prev_stmt_info
;
5793 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5794 struct loop
*loop
= NULL
;
5795 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5796 bool nested_in_vect_loop
= false;
5797 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5798 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5802 gimple new_stmt
= NULL
;
5804 enum dr_alignment_support alignment_support_scheme
;
5805 tree dataref_ptr
= NULL_TREE
;
5806 tree dataref_offset
= NULL_TREE
;
5807 gimple ptr_incr
= NULL
;
5808 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5810 int i
, j
, group_size
= -1, group_gap
;
5811 tree msq
= NULL_TREE
, lsq
;
5812 tree offset
= NULL_TREE
;
5813 tree byte_offset
= NULL_TREE
;
5814 tree realignment_token
= NULL_TREE
;
5816 vec
<tree
> dr_chain
= vNULL
;
5817 bool grouped_load
= false;
5818 bool load_lanes_p
= false;
5821 bool negative
= false;
5822 bool compute_in_loop
= false;
5823 struct loop
*at_loop
;
5825 bool slp
= (slp_node
!= NULL
);
5826 bool slp_perm
= false;
5827 enum tree_code code
;
5828 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5831 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5832 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5833 int gather_scale
= 1;
5834 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5838 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5839 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5840 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5845 /* Multiple types in SLP are handled by creating the appropriate number of
5846 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5848 if (slp
|| PURE_SLP_STMT (stmt_info
))
5851 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5853 gcc_assert (ncopies
>= 1);
5855 /* FORNOW. This restriction should be relaxed. */
5856 if (nested_in_vect_loop
&& ncopies
> 1)
5858 if (dump_enabled_p ())
5859 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5860 "multiple types in nested loop.\n");
5864 /* Invalidate assumptions made by dependence analysis when vectorization
5865 on the unrolled body effectively re-orders stmts. */
5867 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5868 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5869 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5871 if (dump_enabled_p ())
5872 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5873 "cannot perform implicit CSE when unrolling "
5874 "with negative dependence distance\n");
5878 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5881 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5884 /* Is vectorizable load? */
5885 if (!is_gimple_assign (stmt
))
5888 scalar_dest
= gimple_assign_lhs (stmt
);
5889 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5892 code
= gimple_assign_rhs_code (stmt
);
5893 if (code
!= ARRAY_REF
5894 && code
!= BIT_FIELD_REF
5895 && code
!= INDIRECT_REF
5896 && code
!= COMPONENT_REF
5897 && code
!= IMAGPART_EXPR
5898 && code
!= REALPART_EXPR
5900 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5903 if (!STMT_VINFO_DATA_REF (stmt_info
))
5906 elem_type
= TREE_TYPE (vectype
);
5907 mode
= TYPE_MODE (vectype
);
5909 /* FORNOW. In some cases can vectorize even if data-type not supported
5910 (e.g. - data copies). */
5911 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5913 if (dump_enabled_p ())
5914 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5915 "Aligned load, but unsupported type.\n");
5919 /* Check if the load is a part of an interleaving chain. */
5920 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5922 grouped_load
= true;
5924 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5926 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5928 /* If this is single-element interleaving with an element distance
5929 that leaves unused vector loads around punt - we at least create
5930 very sub-optimal code in that case (and blow up memory,
5932 if (first_stmt
== stmt
5933 && !GROUP_NEXT_ELEMENT (stmt_info
)
5934 && GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
5936 if (dump_enabled_p ())
5937 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5938 "single-element interleaving not supported "
5939 "for not adjacent vector loads\n");
5943 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
5946 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5948 && !PURE_SLP_STMT (stmt_info
)
5949 && !STMT_VINFO_STRIDED_P (stmt_info
))
5951 if (vect_load_lanes_supported (vectype
, group_size
))
5952 load_lanes_p
= true;
5953 else if (!vect_grouped_load_supported (vectype
, group_size
))
5957 /* Invalidate assumptions made by dependence analysis when vectorization
5958 on the unrolled body effectively re-orders stmts. */
5959 if (!PURE_SLP_STMT (stmt_info
)
5960 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5961 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5962 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5964 if (dump_enabled_p ())
5965 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5966 "cannot perform implicit CSE when performing "
5967 "group loads with negative dependence distance\n");
5971 /* Similarly when the stmt is a load that is both part of a SLP
5972 instance and a loop vectorized stmt via the same-dr mechanism
5973 we have to give up. */
5974 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
5975 && (STMT_SLP_TYPE (stmt_info
)
5976 != STMT_SLP_TYPE (vinfo_for_stmt
5977 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
5979 if (dump_enabled_p ())
5980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5981 "conflicting SLP types for CSEd load\n");
5987 if (STMT_VINFO_GATHER_P (stmt_info
))
5991 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5992 &gather_off
, &gather_scale
);
5993 gcc_assert (gather_decl
);
5994 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5995 &def_stmt
, &def
, &gather_dt
,
5996 &gather_off_vectype
))
5998 if (dump_enabled_p ())
5999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6000 "gather index use not simple.\n");
6004 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6007 && (slp
|| PURE_SLP_STMT (stmt_info
)))
6008 && (group_size
> nunits
6009 || nunits
% group_size
!= 0
6010 /* We don't support load permutations. */
6013 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6014 "unhandled strided group load\n");
6020 negative
= tree_int_cst_compare (nested_in_vect_loop
6021 ? STMT_VINFO_DR_STEP (stmt_info
)
6023 size_zero_node
) < 0;
6024 if (negative
&& ncopies
> 1)
6026 if (dump_enabled_p ())
6027 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6028 "multiple types with negative step.\n");
6036 if (dump_enabled_p ())
6037 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6038 "negative step for group load not supported"
6042 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6043 if (alignment_support_scheme
!= dr_aligned
6044 && alignment_support_scheme
!= dr_unaligned_supported
)
6046 if (dump_enabled_p ())
6047 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6048 "negative step but alignment required.\n");
6051 if (!perm_mask_for_reverse (vectype
))
6053 if (dump_enabled_p ())
6054 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6055 "negative step and reversing not supported."
6062 if (!vec_stmt
) /* transformation not required. */
6064 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6065 /* The SLP costs are calculated during SLP analysis. */
6066 if (!PURE_SLP_STMT (stmt_info
))
6067 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6072 if (dump_enabled_p ())
6073 dump_printf_loc (MSG_NOTE
, vect_location
,
6074 "transform load. ncopies = %d\n", ncopies
);
6078 ensure_base_align (stmt_info
, dr
);
6080 if (STMT_VINFO_GATHER_P (stmt_info
))
6082 tree vec_oprnd0
= NULL_TREE
, op
;
6083 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6084 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6085 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6086 edge pe
= loop_preheader_edge (loop
);
6089 enum { NARROW
, NONE
, WIDEN
} modifier
;
6090 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6092 if (nunits
== gather_off_nunits
)
6094 else if (nunits
== gather_off_nunits
/ 2)
6096 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6099 for (i
= 0; i
< gather_off_nunits
; ++i
)
6100 sel
[i
] = i
| nunits
;
6102 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6104 else if (nunits
== gather_off_nunits
* 2)
6106 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6109 for (i
= 0; i
< nunits
; ++i
)
6110 sel
[i
] = i
< gather_off_nunits
6111 ? i
: i
+ nunits
- gather_off_nunits
;
6113 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6119 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6120 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6121 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6122 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6123 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6124 scaletype
= TREE_VALUE (arglist
);
6125 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6127 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6129 ptr
= fold_convert (ptrtype
, gather_base
);
6130 if (!is_gimple_min_invariant (ptr
))
6132 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6133 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6134 gcc_assert (!new_bb
);
6137 /* Currently we support only unconditional gather loads,
6138 so mask should be all ones. */
6139 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6140 mask
= build_int_cst (masktype
, -1);
6141 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6143 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6144 mask
= build_vector_from_val (masktype
, mask
);
6145 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6147 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6151 for (j
= 0; j
< 6; ++j
)
6153 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6154 mask
= build_real (TREE_TYPE (masktype
), r
);
6155 mask
= build_vector_from_val (masktype
, mask
);
6156 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6161 scale
= build_int_cst (scaletype
, gather_scale
);
6163 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6164 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6165 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6169 for (j
= 0; j
< 6; ++j
)
6171 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6172 merge
= build_real (TREE_TYPE (rettype
), r
);
6176 merge
= build_vector_from_val (rettype
, merge
);
6177 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6179 prev_stmt_info
= NULL
;
6180 for (j
= 0; j
< ncopies
; ++j
)
6182 if (modifier
== WIDEN
&& (j
& 1))
6183 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6184 perm_mask
, stmt
, gsi
);
6187 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
6190 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6192 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6194 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6195 == TYPE_VECTOR_SUBPARTS (idxtype
));
6196 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
6197 var
= make_ssa_name (var
);
6198 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6200 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6201 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6206 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6208 if (!useless_type_conversion_p (vectype
, rettype
))
6210 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6211 == TYPE_VECTOR_SUBPARTS (rettype
));
6212 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
6213 op
= make_ssa_name (var
, new_stmt
);
6214 gimple_call_set_lhs (new_stmt
, op
);
6215 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6216 var
= make_ssa_name (vec_dest
);
6217 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6219 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6223 var
= make_ssa_name (vec_dest
, new_stmt
);
6224 gimple_call_set_lhs (new_stmt
, var
);
6227 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6229 if (modifier
== NARROW
)
6236 var
= permute_vec_elements (prev_res
, var
,
6237 perm_mask
, stmt
, gsi
);
6238 new_stmt
= SSA_NAME_DEF_STMT (var
);
6241 if (prev_stmt_info
== NULL
)
6242 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6244 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6245 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6249 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6251 gimple_stmt_iterator incr_gsi
;
6257 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6258 gimple_seq stmts
= NULL
;
6259 tree stride_base
, stride_step
, alias_off
;
6261 gcc_assert (!nested_in_vect_loop
);
6264 = fold_build_pointer_plus
6265 (unshare_expr (DR_BASE_ADDRESS (dr
)),
6266 size_binop (PLUS_EXPR
,
6267 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
6268 convert_to_ptrofftype (DR_INIT (dr
))));
6269 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
6271 /* For a load with loop-invariant (but other than power-of-2)
6272 stride (i.e. not a grouped access) like so:
6274 for (i = 0; i < n; i += stride)
6277 we generate a new induction variable and new accesses to
6278 form a new vector (or vectors, depending on ncopies):
6280 for (j = 0; ; j += VF*stride)
6282 tmp2 = array[j + stride];
6284 vectemp = {tmp1, tmp2, ...}
6287 ivstep
= stride_step
;
6288 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6289 build_int_cst (TREE_TYPE (ivstep
), vf
));
6291 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6293 create_iv (stride_base
, ivstep
, NULL
,
6294 loop
, &incr_gsi
, insert_after
,
6296 incr
= gsi_stmt (incr_gsi
);
6297 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6299 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6301 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6303 prev_stmt_info
= NULL
;
6304 running_off
= offvar
;
6305 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
6306 int nloads
= nunits
;
6307 tree ltype
= TREE_TYPE (vectype
);
6310 nloads
= nunits
/ group_size
;
6311 if (group_size
< nunits
)
6312 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6315 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6316 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6317 gcc_assert (!slp_perm
);
6319 for (j
= 0; j
< ncopies
; j
++)
6325 vec_alloc (v
, nloads
);
6326 for (i
= 0; i
< nloads
; i
++)
6328 tree newref
, newoff
;
6330 newref
= build2 (MEM_REF
, ltype
, running_off
, alias_off
);
6332 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6335 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6336 newoff
= copy_ssa_name (running_off
);
6337 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6338 running_off
, stride_step
);
6339 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6341 running_off
= newoff
;
6344 vec_inv
= build_constructor (vectype
, v
);
6345 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6346 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6350 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6351 build2 (MEM_REF
, ltype
,
6352 running_off
, alias_off
));
6353 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6355 tree newoff
= copy_ssa_name (running_off
);
6356 gimple incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6357 running_off
, stride_step
);
6358 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6360 running_off
= newoff
;
6364 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6366 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6368 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6369 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6376 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6378 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6379 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6380 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6382 /* Check if the chain of loads is already vectorized. */
6383 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6384 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6385 ??? But we can only do so if there is exactly one
6386 as we have no way to get at the rest. Leave the CSE
6388 ??? With the group load eventually participating
6389 in multiple different permutations (having multiple
6390 slp nodes which refer to the same group) the CSE
6391 is even wrong code. See PR56270. */
6394 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6397 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6398 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6400 /* VEC_NUM is the number of vect stmts to be created for this group. */
6403 grouped_load
= false;
6404 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6405 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6409 vec_num
= group_size
;
6417 group_size
= vec_num
= 1;
6421 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6422 gcc_assert (alignment_support_scheme
);
6423 /* Targets with load-lane instructions must not require explicit
6425 gcc_assert (!load_lanes_p
6426 || alignment_support_scheme
== dr_aligned
6427 || alignment_support_scheme
== dr_unaligned_supported
);
6429 /* In case the vectorization factor (VF) is bigger than the number
6430 of elements that we can fit in a vectype (nunits), we have to generate
6431 more than one vector stmt - i.e - we need to "unroll" the
6432 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6433 from one copy of the vector stmt to the next, in the field
6434 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6435 stages to find the correct vector defs to be used when vectorizing
6436 stmts that use the defs of the current stmt. The example below
6437 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6438 need to create 4 vectorized stmts):
6440 before vectorization:
6441 RELATED_STMT VEC_STMT
6445 step 1: vectorize stmt S1:
6446 We first create the vector stmt VS1_0, and, as usual, record a
6447 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6448 Next, we create the vector stmt VS1_1, and record a pointer to
6449 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6450 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6452 RELATED_STMT VEC_STMT
6453 VS1_0: vx0 = memref0 VS1_1 -
6454 VS1_1: vx1 = memref1 VS1_2 -
6455 VS1_2: vx2 = memref2 VS1_3 -
6456 VS1_3: vx3 = memref3 - -
6457 S1: x = load - VS1_0
6460 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6461 information we recorded in RELATED_STMT field is used to vectorize
6464 /* In case of interleaving (non-unit grouped access):
6471 Vectorized loads are created in the order of memory accesses
6472 starting from the access of the first stmt of the chain:
6475 VS2: vx1 = &base + vec_size*1
6476 VS3: vx3 = &base + vec_size*2
6477 VS4: vx4 = &base + vec_size*3
6479 Then permutation statements are generated:
6481 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6482 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6485 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6486 (the order of the data-refs in the output of vect_permute_load_chain
6487 corresponds to the order of scalar stmts in the interleaving chain - see
6488 the documentation of vect_permute_load_chain()).
6489 The generation of permutation stmts and recording them in
6490 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6492 In case of both multiple types and interleaving, the vector loads and
6493 permutation stmts above are created for every copy. The result vector
6494 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6495 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6497 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6498 on a target that supports unaligned accesses (dr_unaligned_supported)
6499 we generate the following code:
6503 p = p + indx * vectype_size;
6508 Otherwise, the data reference is potentially unaligned on a target that
6509 does not support unaligned accesses (dr_explicit_realign_optimized) -
6510 then generate the following code, in which the data in each iteration is
6511 obtained by two vector loads, one from the previous iteration, and one
6512 from the current iteration:
6514 msq_init = *(floor(p1))
6515 p2 = initial_addr + VS - 1;
6516 realignment_token = call target_builtin;
6519 p2 = p2 + indx * vectype_size
6521 vec_dest = realign_load (msq, lsq, realignment_token)
6526 /* If the misalignment remains the same throughout the execution of the
6527 loop, we can create the init_addr and permutation mask at the loop
6528 preheader. Otherwise, it needs to be created inside the loop.
6529 This can only occur when vectorizing memory accesses in the inner-loop
6530 nested within an outer-loop that is being vectorized. */
6532 if (nested_in_vect_loop
6533 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6534 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6536 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6537 compute_in_loop
= true;
6540 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6541 || alignment_support_scheme
== dr_explicit_realign
)
6542 && !compute_in_loop
)
6544 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6545 alignment_support_scheme
, NULL_TREE
,
6547 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6549 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
6550 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6558 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6561 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6563 aggr_type
= vectype
;
6565 prev_stmt_info
= NULL
;
6566 for (j
= 0; j
< ncopies
; j
++)
6568 /* 1. Create the vector or array pointer update chain. */
6571 bool simd_lane_access_p
6572 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6573 if (simd_lane_access_p
6574 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6575 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6576 && integer_zerop (DR_OFFSET (first_dr
))
6577 && integer_zerop (DR_INIT (first_dr
))
6578 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6579 get_alias_set (DR_REF (first_dr
)))
6580 && (alignment_support_scheme
== dr_aligned
6581 || alignment_support_scheme
== dr_unaligned_supported
))
6583 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6584 dataref_offset
= build_int_cst (reference_alias_ptr_type
6585 (DR_REF (first_dr
)), 0);
6590 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6591 offset
, &dummy
, gsi
, &ptr_incr
,
6592 simd_lane_access_p
, &inv_p
,
6595 else if (dataref_offset
)
6596 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6597 TYPE_SIZE_UNIT (aggr_type
));
6599 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6600 TYPE_SIZE_UNIT (aggr_type
));
6602 if (grouped_load
|| slp_perm
)
6603 dr_chain
.create (vec_num
);
6609 vec_array
= create_vector_array (vectype
, vec_num
);
6612 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6613 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6614 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6615 gimple_call_set_lhs (new_stmt
, vec_array
);
6616 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6618 /* Extract each vector into an SSA_NAME. */
6619 for (i
= 0; i
< vec_num
; i
++)
6621 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6623 dr_chain
.quick_push (new_temp
);
6626 /* Record the mapping between SSA_NAMEs and statements. */
6627 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6631 for (i
= 0; i
< vec_num
; i
++)
6634 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6637 /* 2. Create the vector-load in the loop. */
6638 switch (alignment_support_scheme
)
6641 case dr_unaligned_supported
:
6643 unsigned int align
, misalign
;
6646 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6649 : build_int_cst (reference_alias_ptr_type
6650 (DR_REF (first_dr
)), 0));
6651 align
= TYPE_ALIGN_UNIT (vectype
);
6652 if (alignment_support_scheme
== dr_aligned
)
6654 gcc_assert (aligned_access_p (first_dr
));
6657 else if (DR_MISALIGNMENT (first_dr
) == -1)
6659 TREE_TYPE (data_ref
)
6660 = build_aligned_type (TREE_TYPE (data_ref
),
6661 TYPE_ALIGN (elem_type
));
6662 align
= TYPE_ALIGN_UNIT (elem_type
);
6667 TREE_TYPE (data_ref
)
6668 = build_aligned_type (TREE_TYPE (data_ref
),
6669 TYPE_ALIGN (elem_type
));
6670 misalign
= DR_MISALIGNMENT (first_dr
);
6672 if (dataref_offset
== NULL_TREE
)
6673 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6677 case dr_explicit_realign
:
6681 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
6683 if (compute_in_loop
)
6684 msq
= vect_setup_realignment (first_stmt
, gsi
,
6686 dr_explicit_realign
,
6689 ptr
= copy_ssa_name (dataref_ptr
);
6690 new_stmt
= gimple_build_assign
6691 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
6693 (TREE_TYPE (dataref_ptr
),
6694 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6695 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6697 = build2 (MEM_REF
, vectype
, ptr
,
6698 build_int_cst (reference_alias_ptr_type
6699 (DR_REF (first_dr
)), 0));
6700 vec_dest
= vect_create_destination_var (scalar_dest
,
6702 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6703 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6704 gimple_assign_set_lhs (new_stmt
, new_temp
);
6705 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6706 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6707 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6710 bump
= size_binop (MULT_EXPR
, vs
,
6711 TYPE_SIZE_UNIT (elem_type
));
6712 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
6713 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6714 new_stmt
= gimple_build_assign
6715 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
6718 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6719 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6720 gimple_assign_set_lhs (new_stmt
, ptr
);
6721 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6723 = build2 (MEM_REF
, vectype
, ptr
,
6724 build_int_cst (reference_alias_ptr_type
6725 (DR_REF (first_dr
)), 0));
6728 case dr_explicit_realign_optimized
:
6729 new_temp
= copy_ssa_name (dataref_ptr
);
6730 new_stmt
= gimple_build_assign
6731 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
6733 (TREE_TYPE (dataref_ptr
),
6734 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6735 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6737 = build2 (MEM_REF
, vectype
, new_temp
,
6738 build_int_cst (reference_alias_ptr_type
6739 (DR_REF (first_dr
)), 0));
6744 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6745 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6746 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6747 gimple_assign_set_lhs (new_stmt
, new_temp
);
6748 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6750 /* 3. Handle explicit realignment if necessary/supported.
6752 vec_dest = realign_load (msq, lsq, realignment_token) */
6753 if (alignment_support_scheme
== dr_explicit_realign_optimized
6754 || alignment_support_scheme
== dr_explicit_realign
)
6756 lsq
= gimple_assign_lhs (new_stmt
);
6757 if (!realignment_token
)
6758 realignment_token
= dataref_ptr
;
6759 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6760 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
6761 msq
, lsq
, realignment_token
);
6762 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6763 gimple_assign_set_lhs (new_stmt
, new_temp
);
6764 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6766 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6769 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6770 add_phi_arg (phi
, lsq
,
6771 loop_latch_edge (containing_loop
),
6777 /* 4. Handle invariant-load. */
6778 if (inv_p
&& !bb_vinfo
)
6780 gcc_assert (!grouped_load
);
6781 /* If we have versioned for aliasing or the loop doesn't
6782 have any data dependencies that would preclude this,
6783 then we are sure this is a loop invariant load and
6784 thus we can insert it on the preheader edge. */
6785 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6786 && !nested_in_vect_loop
6787 && hoist_defs_of_uses (stmt
, loop
))
6789 if (dump_enabled_p ())
6791 dump_printf_loc (MSG_NOTE
, vect_location
,
6792 "hoisting out of the vectorized "
6794 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6796 tree tem
= copy_ssa_name (scalar_dest
);
6797 gsi_insert_on_edge_immediate
6798 (loop_preheader_edge (loop
),
6799 gimple_build_assign (tem
,
6801 (gimple_assign_rhs1 (stmt
))));
6802 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6806 gimple_stmt_iterator gsi2
= *gsi
;
6808 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6811 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6812 set_vinfo_for_stmt (new_stmt
,
6813 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6819 tree perm_mask
= perm_mask_for_reverse (vectype
);
6820 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6821 perm_mask
, stmt
, gsi
);
6822 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6825 /* Collect vector loads and later create their permutation in
6826 vect_transform_grouped_load (). */
6827 if (grouped_load
|| slp_perm
)
6828 dr_chain
.quick_push (new_temp
);
6830 /* Store vector loads in the corresponding SLP_NODE. */
6831 if (slp
&& !slp_perm
)
6832 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6834 /* Bump the vector pointer to account for a gap. */
6835 if (slp
&& group_gap
!= 0)
6837 tree bump
= size_binop (MULT_EXPR
,
6838 TYPE_SIZE_UNIT (elem_type
),
6839 size_int (group_gap
));
6840 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6845 if (slp
&& !slp_perm
)
6850 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6851 slp_node_instance
, false))
6853 dr_chain
.release ();
6862 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6863 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6868 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6870 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6871 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6874 dr_chain
.release ();
6880 /* Function vect_is_simple_cond.
6883 LOOP - the loop that is being vectorized.
6884 COND - Condition that is checked for simple use.
6887 *COMP_VECTYPE - the vector type for the comparison.
6889 Returns whether a COND can be vectorized. Checks whether
6890 condition operands are supportable using vec_is_simple_use. */
6893 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6894 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6898 enum vect_def_type dt
;
6899 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6901 if (!COMPARISON_CLASS_P (cond
))
6904 lhs
= TREE_OPERAND (cond
, 0);
6905 rhs
= TREE_OPERAND (cond
, 1);
6907 if (TREE_CODE (lhs
) == SSA_NAME
)
6909 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6910 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6911 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6914 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6915 && TREE_CODE (lhs
) != FIXED_CST
)
6918 if (TREE_CODE (rhs
) == SSA_NAME
)
6920 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6921 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6922 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6925 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6926 && TREE_CODE (rhs
) != FIXED_CST
)
6929 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6933 /* vectorizable_condition.
6935 Check if STMT is conditional modify expression that can be vectorized.
6936 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6937 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6940 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6941 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6942 else caluse if it is 2).
6944 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6947 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6948 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6951 tree scalar_dest
= NULL_TREE
;
6952 tree vec_dest
= NULL_TREE
;
6953 tree cond_expr
, then_clause
, else_clause
;
6954 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6955 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6956 tree comp_vectype
= NULL_TREE
;
6957 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6958 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6959 tree vec_compare
, vec_cond_expr
;
6961 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6963 enum vect_def_type dt
, dts
[4];
6964 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6966 enum tree_code code
;
6967 stmt_vec_info prev_stmt_info
= NULL
;
6969 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6970 vec
<tree
> vec_oprnds0
= vNULL
;
6971 vec
<tree
> vec_oprnds1
= vNULL
;
6972 vec
<tree
> vec_oprnds2
= vNULL
;
6973 vec
<tree
> vec_oprnds3
= vNULL
;
6976 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6979 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6981 gcc_assert (ncopies
>= 1);
6982 if (reduc_index
&& ncopies
> 1)
6983 return false; /* FORNOW */
6985 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6988 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6991 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6992 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6996 /* FORNOW: not yet supported. */
6997 if (STMT_VINFO_LIVE_P (stmt_info
))
6999 if (dump_enabled_p ())
7000 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7001 "value used after loop.\n");
7005 /* Is vectorizable conditional operation? */
7006 if (!is_gimple_assign (stmt
))
7009 code
= gimple_assign_rhs_code (stmt
);
7011 if (code
!= COND_EXPR
)
7014 cond_expr
= gimple_assign_rhs1 (stmt
);
7015 then_clause
= gimple_assign_rhs2 (stmt
);
7016 else_clause
= gimple_assign_rhs3 (stmt
);
7018 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
7023 if (TREE_CODE (then_clause
) == SSA_NAME
)
7025 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
7026 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
7027 &then_def_stmt
, &def
, &dt
))
7030 else if (TREE_CODE (then_clause
) != INTEGER_CST
7031 && TREE_CODE (then_clause
) != REAL_CST
7032 && TREE_CODE (then_clause
) != FIXED_CST
)
7035 if (TREE_CODE (else_clause
) == SSA_NAME
)
7037 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
7038 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
7039 &else_def_stmt
, &def
, &dt
))
7042 else if (TREE_CODE (else_clause
) != INTEGER_CST
7043 && TREE_CODE (else_clause
) != REAL_CST
7044 && TREE_CODE (else_clause
) != FIXED_CST
)
7047 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
7048 /* The result of a vector comparison should be signed type. */
7049 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
7050 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
7051 if (vec_cmp_type
== NULL_TREE
)
7056 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7057 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7064 vec_oprnds0
.create (1);
7065 vec_oprnds1
.create (1);
7066 vec_oprnds2
.create (1);
7067 vec_oprnds3
.create (1);
7071 scalar_dest
= gimple_assign_lhs (stmt
);
7072 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7074 /* Handle cond expr. */
7075 for (j
= 0; j
< ncopies
; j
++)
7077 gassign
*new_stmt
= NULL
;
7082 auto_vec
<tree
, 4> ops
;
7083 auto_vec
<vec
<tree
>, 4> vec_defs
;
7085 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7086 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7087 ops
.safe_push (then_clause
);
7088 ops
.safe_push (else_clause
);
7089 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7090 vec_oprnds3
= vec_defs
.pop ();
7091 vec_oprnds2
= vec_defs
.pop ();
7092 vec_oprnds1
= vec_defs
.pop ();
7093 vec_oprnds0
= vec_defs
.pop ();
7096 vec_defs
.release ();
7102 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7104 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
7105 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
7108 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7110 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
7111 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
7112 if (reduc_index
== 1)
7113 vec_then_clause
= reduc_def
;
7116 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7118 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
7119 NULL
, >emp
, &def
, &dts
[2]);
7121 if (reduc_index
== 2)
7122 vec_else_clause
= reduc_def
;
7125 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7127 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
7128 NULL
, >emp
, &def
, &dts
[3]);
7134 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
7135 vec_oprnds0
.pop ());
7136 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
7137 vec_oprnds1
.pop ());
7138 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7139 vec_oprnds2
.pop ());
7140 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7141 vec_oprnds3
.pop ());
7146 vec_oprnds0
.quick_push (vec_cond_lhs
);
7147 vec_oprnds1
.quick_push (vec_cond_rhs
);
7148 vec_oprnds2
.quick_push (vec_then_clause
);
7149 vec_oprnds3
.quick_push (vec_else_clause
);
7152 /* Arguments are ready. Create the new vector stmt. */
7153 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7155 vec_cond_rhs
= vec_oprnds1
[i
];
7156 vec_then_clause
= vec_oprnds2
[i
];
7157 vec_else_clause
= vec_oprnds3
[i
];
7159 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7160 vec_cond_lhs
, vec_cond_rhs
);
7161 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
7162 vec_compare
, vec_then_clause
, vec_else_clause
);
7164 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
7165 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7166 gimple_assign_set_lhs (new_stmt
, new_temp
);
7167 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7169 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7176 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7178 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7180 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7183 vec_oprnds0
.release ();
7184 vec_oprnds1
.release ();
7185 vec_oprnds2
.release ();
7186 vec_oprnds3
.release ();
7192 /* Make sure the statement is vectorizable. */
7195 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
7197 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7198 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7199 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7201 tree scalar_type
, vectype
;
7202 gimple pattern_stmt
;
7203 gimple_seq pattern_def_seq
;
7205 if (dump_enabled_p ())
7207 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7208 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7211 if (gimple_has_volatile_ops (stmt
))
7213 if (dump_enabled_p ())
7214 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7215 "not vectorized: stmt has volatile operands\n");
7220 /* Skip stmts that do not need to be vectorized. In loops this is expected
7222 - the COND_EXPR which is the loop exit condition
7223 - any LABEL_EXPRs in the loop
7224 - computations that are used only for array indexing or loop control.
7225 In basic blocks we only analyze statements that are a part of some SLP
7226 instance, therefore, all the statements are relevant.
7228 Pattern statement needs to be analyzed instead of the original statement
7229 if the original statement is not relevant. Otherwise, we analyze both
7230 statements. In basic blocks we are called from some SLP instance
7231 traversal, don't analyze pattern stmts instead, the pattern stmts
7232 already will be part of SLP instance. */
7234 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7235 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7236 && !STMT_VINFO_LIVE_P (stmt_info
))
7238 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7240 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7241 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7243 /* Analyze PATTERN_STMT instead of the original stmt. */
7244 stmt
= pattern_stmt
;
7245 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7246 if (dump_enabled_p ())
7248 dump_printf_loc (MSG_NOTE
, vect_location
,
7249 "==> examining pattern statement: ");
7250 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7255 if (dump_enabled_p ())
7256 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7261 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7264 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7265 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7267 /* Analyze PATTERN_STMT too. */
7268 if (dump_enabled_p ())
7270 dump_printf_loc (MSG_NOTE
, vect_location
,
7271 "==> examining pattern statement: ");
7272 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7275 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7279 if (is_pattern_stmt_p (stmt_info
)
7281 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7283 gimple_stmt_iterator si
;
7285 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7287 gimple pattern_def_stmt
= gsi_stmt (si
);
7288 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7289 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7291 /* Analyze def stmt of STMT if it's a pattern stmt. */
7292 if (dump_enabled_p ())
7294 dump_printf_loc (MSG_NOTE
, vect_location
,
7295 "==> examining pattern def statement: ");
7296 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7299 if (!vect_analyze_stmt (pattern_def_stmt
,
7300 need_to_vectorize
, node
))
7306 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7308 case vect_internal_def
:
7311 case vect_reduction_def
:
7312 case vect_nested_cycle
:
7313 gcc_assert (!bb_vinfo
7314 && (relevance
== vect_used_in_outer
7315 || relevance
== vect_used_in_outer_by_reduction
7316 || relevance
== vect_used_by_reduction
7317 || relevance
== vect_unused_in_scope
));
7320 case vect_induction_def
:
7321 case vect_constant_def
:
7322 case vect_external_def
:
7323 case vect_unknown_def_type
:
7330 gcc_assert (PURE_SLP_STMT (stmt_info
));
7332 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7333 if (dump_enabled_p ())
7335 dump_printf_loc (MSG_NOTE
, vect_location
,
7336 "get vectype for scalar type: ");
7337 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7338 dump_printf (MSG_NOTE
, "\n");
7341 vectype
= get_vectype_for_scalar_type (scalar_type
);
7344 if (dump_enabled_p ())
7346 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7347 "not SLPed: unsupported data-type ");
7348 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7350 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7355 if (dump_enabled_p ())
7357 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7358 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7359 dump_printf (MSG_NOTE
, "\n");
7362 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7365 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7367 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7368 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7369 || (is_gimple_call (stmt
)
7370 && gimple_call_lhs (stmt
) == NULL_TREE
));
7371 *need_to_vectorize
= true;
7374 if (PURE_SLP_STMT (stmt_info
) && !node
)
7376 dump_printf_loc (MSG_NOTE
, vect_location
,
7377 "handled only by SLP analysis\n");
7383 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7384 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7385 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7386 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7387 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7388 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7389 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7390 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7391 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7392 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7393 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
7394 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7398 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7399 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7400 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7401 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7402 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7403 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7404 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7405 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7406 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7411 if (dump_enabled_p ())
7413 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7414 "not vectorized: relevant stmt not ");
7415 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7416 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7425 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7426 need extra handling, except for vectorizable reductions. */
7427 if (STMT_VINFO_LIVE_P (stmt_info
)
7428 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7429 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7433 if (dump_enabled_p ())
7435 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7436 "not vectorized: live stmt not ");
7437 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7438 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7448 /* Function vect_transform_stmt.
7450 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7453 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7454 bool *grouped_store
, slp_tree slp_node
,
7455 slp_instance slp_node_instance
)
7457 bool is_store
= false;
7458 gimple vec_stmt
= NULL
;
7459 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7462 switch (STMT_VINFO_TYPE (stmt_info
))
7464 case type_demotion_vec_info_type
:
7465 case type_promotion_vec_info_type
:
7466 case type_conversion_vec_info_type
:
7467 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7471 case induc_vec_info_type
:
7472 gcc_assert (!slp_node
);
7473 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7477 case shift_vec_info_type
:
7478 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7482 case op_vec_info_type
:
7483 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7487 case assignment_vec_info_type
:
7488 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7492 case load_vec_info_type
:
7493 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7498 case store_vec_info_type
:
7499 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7501 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7503 /* In case of interleaving, the whole chain is vectorized when the
7504 last store in the chain is reached. Store stmts before the last
7505 one are skipped, and there vec_stmt_info shouldn't be freed
7507 *grouped_store
= true;
7508 if (STMT_VINFO_VEC_STMT (stmt_info
))
7515 case condition_vec_info_type
:
7516 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7520 case call_vec_info_type
:
7521 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7522 stmt
= gsi_stmt (*gsi
);
7523 if (is_gimple_call (stmt
)
7524 && gimple_call_internal_p (stmt
)
7525 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7529 case call_simd_clone_vec_info_type
:
7530 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7531 stmt
= gsi_stmt (*gsi
);
7534 case reduc_vec_info_type
:
7535 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7540 if (!STMT_VINFO_LIVE_P (stmt_info
))
7542 if (dump_enabled_p ())
7543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7544 "stmt not supported.\n");
7549 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7550 is being vectorized, but outside the immediately enclosing loop. */
7552 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7553 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7554 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7555 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7556 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7557 || STMT_VINFO_RELEVANT (stmt_info
) ==
7558 vect_used_in_outer_by_reduction
))
7560 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7561 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7562 imm_use_iterator imm_iter
;
7563 use_operand_p use_p
;
7567 if (dump_enabled_p ())
7568 dump_printf_loc (MSG_NOTE
, vect_location
,
7569 "Record the vdef for outer-loop vectorization.\n");
7571 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7572 (to be used when vectorizing outer-loop stmts that use the DEF of
7574 if (gimple_code (stmt
) == GIMPLE_PHI
)
7575 scalar_dest
= PHI_RESULT (stmt
);
7577 scalar_dest
= gimple_assign_lhs (stmt
);
7579 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7581 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7583 exit_phi
= USE_STMT (use_p
);
7584 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7589 /* Handle stmts whose DEF is used outside the loop-nest that is
7590 being vectorized. */
7591 if (STMT_VINFO_LIVE_P (stmt_info
)
7592 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7594 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7599 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7605 /* Remove a group of stores (for SLP or interleaving), free their
7609 vect_remove_stores (gimple first_stmt
)
7611 gimple next
= first_stmt
;
7613 gimple_stmt_iterator next_si
;
7617 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7619 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7620 if (is_pattern_stmt_p (stmt_info
))
7621 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7622 /* Free the attached stmt_vec_info and remove the stmt. */
7623 next_si
= gsi_for_stmt (next
);
7624 unlink_stmt_vdef (next
);
7625 gsi_remove (&next_si
, true);
7626 release_defs (next
);
7627 free_stmt_vec_info (next
);
7633 /* Function new_stmt_vec_info.
7635 Create and initialize a new stmt_vec_info struct for STMT. */
7638 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7639 bb_vec_info bb_vinfo
)
7642 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7644 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7645 STMT_VINFO_STMT (res
) = stmt
;
7646 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7647 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7648 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7649 STMT_VINFO_LIVE_P (res
) = false;
7650 STMT_VINFO_VECTYPE (res
) = NULL
;
7651 STMT_VINFO_VEC_STMT (res
) = NULL
;
7652 STMT_VINFO_VECTORIZABLE (res
) = true;
7653 STMT_VINFO_IN_PATTERN_P (res
) = false;
7654 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7655 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7656 STMT_VINFO_DATA_REF (res
) = NULL
;
7658 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7659 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7660 STMT_VINFO_DR_INIT (res
) = NULL
;
7661 STMT_VINFO_DR_STEP (res
) = NULL
;
7662 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7664 if (gimple_code (stmt
) == GIMPLE_PHI
7665 && is_loop_header_bb_p (gimple_bb (stmt
)))
7666 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7668 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7670 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7671 STMT_SLP_TYPE (res
) = loop_vect
;
7672 GROUP_FIRST_ELEMENT (res
) = NULL
;
7673 GROUP_NEXT_ELEMENT (res
) = NULL
;
7674 GROUP_SIZE (res
) = 0;
7675 GROUP_STORE_COUNT (res
) = 0;
7676 GROUP_GAP (res
) = 0;
7677 GROUP_SAME_DR_STMT (res
) = NULL
;
7683 /* Create a hash table for stmt_vec_info. */
7686 init_stmt_vec_info_vec (void)
7688 gcc_assert (!stmt_vec_info_vec
.exists ());
7689 stmt_vec_info_vec
.create (50);
7693 /* Free hash table for stmt_vec_info. */
7696 free_stmt_vec_info_vec (void)
7700 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7702 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7703 gcc_assert (stmt_vec_info_vec
.exists ());
7704 stmt_vec_info_vec
.release ();
7708 /* Free stmt vectorization related info. */
7711 free_stmt_vec_info (gimple stmt
)
7713 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7718 /* Check if this statement has a related "pattern stmt"
7719 (introduced by the vectorizer during the pattern recognition
7720 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7722 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7724 stmt_vec_info patt_info
7725 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7728 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7729 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7730 gimple_set_bb (patt_stmt
, NULL
);
7731 tree lhs
= gimple_get_lhs (patt_stmt
);
7732 if (TREE_CODE (lhs
) == SSA_NAME
)
7733 release_ssa_name (lhs
);
7736 gimple_stmt_iterator si
;
7737 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7739 gimple seq_stmt
= gsi_stmt (si
);
7740 gimple_set_bb (seq_stmt
, NULL
);
7741 lhs
= gimple_get_lhs (patt_stmt
);
7742 if (TREE_CODE (lhs
) == SSA_NAME
)
7743 release_ssa_name (lhs
);
7744 free_stmt_vec_info (seq_stmt
);
7747 free_stmt_vec_info (patt_stmt
);
7751 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7752 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
7753 set_vinfo_for_stmt (stmt
, NULL
);
7758 /* Function get_vectype_for_scalar_type_and_size.
7760 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7764 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7766 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7767 machine_mode simd_mode
;
7768 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7775 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7776 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7779 /* For vector types of elements whose mode precision doesn't
7780 match their types precision we use a element type of mode
7781 precision. The vectorization routines will have to make sure
7782 they support the proper result truncation/extension.
7783 We also make sure to build vector types with INTEGER_TYPE
7784 component type only. */
7785 if (INTEGRAL_TYPE_P (scalar_type
)
7786 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7787 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7788 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7789 TYPE_UNSIGNED (scalar_type
));
7791 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7792 When the component mode passes the above test simply use a type
7793 corresponding to that mode. The theory is that any use that
7794 would cause problems with this will disable vectorization anyway. */
7795 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7796 && !INTEGRAL_TYPE_P (scalar_type
))
7797 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7799 /* We can't build a vector type of elements with alignment bigger than
7801 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7802 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7803 TYPE_UNSIGNED (scalar_type
));
7805 /* If we felt back to using the mode fail if there was
7806 no scalar type for it. */
7807 if (scalar_type
== NULL_TREE
)
7810 /* If no size was supplied use the mode the target prefers. Otherwise
7811 lookup a vector mode of the specified size. */
7813 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7815 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7816 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7820 vectype
= build_vector_type (scalar_type
, nunits
);
7822 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7823 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7829 unsigned int current_vector_size
;
7831 /* Function get_vectype_for_scalar_type.
7833 Returns the vector type corresponding to SCALAR_TYPE as supported
7837 get_vectype_for_scalar_type (tree scalar_type
)
7840 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7841 current_vector_size
);
7843 && current_vector_size
== 0)
7844 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7848 /* Function get_same_sized_vectype
7850 Returns a vector type corresponding to SCALAR_TYPE of size
7851 VECTOR_TYPE if supported by the target. */
7854 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7856 return get_vectype_for_scalar_type_and_size
7857 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7860 /* Function vect_is_simple_use.
7863 LOOP_VINFO - the vect info of the loop that is being vectorized.
7864 BB_VINFO - the vect info of the basic block that is being vectorized.
7865 OPERAND - operand of STMT in the loop or bb.
7866 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7868 Returns whether a stmt with OPERAND can be vectorized.
7869 For loops, supportable operands are constants, loop invariants, and operands
7870 that are defined by the current iteration of the loop. Unsupportable
7871 operands are those that are defined by a previous iteration of the loop (as
7872 is the case in reduction/induction computations).
7873 For basic blocks, supportable operands are constants and bb invariants.
7874 For now, operands defined outside the basic block are not supported. */
7877 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7878 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7879 tree
*def
, enum vect_def_type
*dt
)
7882 stmt_vec_info stmt_vinfo
;
7883 struct loop
*loop
= NULL
;
7886 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7891 if (dump_enabled_p ())
7893 dump_printf_loc (MSG_NOTE
, vect_location
,
7894 "vect_is_simple_use: operand ");
7895 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7896 dump_printf (MSG_NOTE
, "\n");
7899 if (CONSTANT_CLASS_P (operand
))
7901 *dt
= vect_constant_def
;
7905 if (is_gimple_min_invariant (operand
))
7908 *dt
= vect_external_def
;
7912 if (TREE_CODE (operand
) == PAREN_EXPR
)
7914 if (dump_enabled_p ())
7915 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7916 operand
= TREE_OPERAND (operand
, 0);
7919 if (TREE_CODE (operand
) != SSA_NAME
)
7921 if (dump_enabled_p ())
7922 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7927 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7928 if (*def_stmt
== NULL
)
7930 if (dump_enabled_p ())
7931 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7936 if (dump_enabled_p ())
7938 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7939 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7942 /* Empty stmt is expected only in case of a function argument.
7943 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7944 if (gimple_nop_p (*def_stmt
))
7947 *dt
= vect_external_def
;
7951 bb
= gimple_bb (*def_stmt
);
7953 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7954 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7955 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7956 *dt
= vect_external_def
;
7959 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7960 if (!loop
&& !STMT_VINFO_VECTORIZABLE (stmt_vinfo
))
7961 *dt
= vect_external_def
;
7963 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7966 if (dump_enabled_p ())
7968 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
7971 case vect_uninitialized_def
:
7972 dump_printf (MSG_NOTE
, "uninitialized\n");
7974 case vect_constant_def
:
7975 dump_printf (MSG_NOTE
, "constant\n");
7977 case vect_external_def
:
7978 dump_printf (MSG_NOTE
, "external\n");
7980 case vect_internal_def
:
7981 dump_printf (MSG_NOTE
, "internal\n");
7983 case vect_induction_def
:
7984 dump_printf (MSG_NOTE
, "induction\n");
7986 case vect_reduction_def
:
7987 dump_printf (MSG_NOTE
, "reduction\n");
7989 case vect_double_reduction_def
:
7990 dump_printf (MSG_NOTE
, "double reduction\n");
7992 case vect_nested_cycle
:
7993 dump_printf (MSG_NOTE
, "nested cycle\n");
7995 case vect_unknown_def_type
:
7996 dump_printf (MSG_NOTE
, "unknown\n");
8001 if (*dt
== vect_unknown_def_type
8003 && *dt
== vect_double_reduction_def
8004 && gimple_code (stmt
) != GIMPLE_PHI
))
8006 if (dump_enabled_p ())
8007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8008 "Unsupported pattern.\n");
8012 switch (gimple_code (*def_stmt
))
8015 *def
= gimple_phi_result (*def_stmt
);
8019 *def
= gimple_assign_lhs (*def_stmt
);
8023 *def
= gimple_call_lhs (*def_stmt
);
8028 if (dump_enabled_p ())
8029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8030 "unsupported defining stmt:\n");
8037 /* Function vect_is_simple_use_1.
8039 Same as vect_is_simple_use_1 but also determines the vector operand
8040 type of OPERAND and stores it to *VECTYPE. If the definition of
8041 OPERAND is vect_uninitialized_def, vect_constant_def or
8042 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8043 is responsible to compute the best suited vector type for the
8047 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
8048 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
8049 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
8051 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
8055 /* Now get a vector type if the def is internal, otherwise supply
8056 NULL_TREE and leave it up to the caller to figure out a proper
8057 type for the use stmt. */
8058 if (*dt
== vect_internal_def
8059 || *dt
== vect_induction_def
8060 || *dt
== vect_reduction_def
8061 || *dt
== vect_double_reduction_def
8062 || *dt
== vect_nested_cycle
)
8064 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8066 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8067 && !STMT_VINFO_RELEVANT (stmt_info
)
8068 && !STMT_VINFO_LIVE_P (stmt_info
))
8069 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8071 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8072 gcc_assert (*vectype
!= NULL_TREE
);
8074 else if (*dt
== vect_uninitialized_def
8075 || *dt
== vect_constant_def
8076 || *dt
== vect_external_def
)
8077 *vectype
= NULL_TREE
;
8085 /* Function supportable_widening_operation
8087 Check whether an operation represented by the code CODE is a
8088 widening operation that is supported by the target platform in
8089 vector form (i.e., when operating on arguments of type VECTYPE_IN
8090 producing a result of type VECTYPE_OUT).
8092 Widening operations we currently support are NOP (CONVERT), FLOAT
8093 and WIDEN_MULT. This function checks if these operations are supported
8094 by the target platform either directly (via vector tree-codes), or via
8098 - CODE1 and CODE2 are codes of vector operations to be used when
8099 vectorizing the operation, if available.
8100 - MULTI_STEP_CVT determines the number of required intermediate steps in
8101 case of multi-step conversion (like char->short->int - in that case
8102 MULTI_STEP_CVT will be 1).
8103 - INTERM_TYPES contains the intermediate type required to perform the
8104 widening operation (short in the above example). */
8107 supportable_widening_operation (enum tree_code code
, gimple stmt
,
8108 tree vectype_out
, tree vectype_in
,
8109 enum tree_code
*code1
, enum tree_code
*code2
,
8110 int *multi_step_cvt
,
8111 vec
<tree
> *interm_types
)
8113 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8114 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8115 struct loop
*vect_loop
= NULL
;
8116 machine_mode vec_mode
;
8117 enum insn_code icode1
, icode2
;
8118 optab optab1
, optab2
;
8119 tree vectype
= vectype_in
;
8120 tree wide_vectype
= vectype_out
;
8121 enum tree_code c1
, c2
;
8123 tree prev_type
, intermediate_type
;
8124 machine_mode intermediate_mode
, prev_mode
;
8125 optab optab3
, optab4
;
8127 *multi_step_cvt
= 0;
8129 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8133 case WIDEN_MULT_EXPR
:
8134 /* The result of a vectorized widening operation usually requires
8135 two vectors (because the widened results do not fit into one vector).
8136 The generated vector results would normally be expected to be
8137 generated in the same order as in the original scalar computation,
8138 i.e. if 8 results are generated in each vector iteration, they are
8139 to be organized as follows:
8140 vect1: [res1,res2,res3,res4],
8141 vect2: [res5,res6,res7,res8].
8143 However, in the special case that the result of the widening
8144 operation is used in a reduction computation only, the order doesn't
8145 matter (because when vectorizing a reduction we change the order of
8146 the computation). Some targets can take advantage of this and
8147 generate more efficient code. For example, targets like Altivec,
8148 that support widen_mult using a sequence of {mult_even,mult_odd}
8149 generate the following vectors:
8150 vect1: [res1,res3,res5,res7],
8151 vect2: [res2,res4,res6,res8].
8153 When vectorizing outer-loops, we execute the inner-loop sequentially
8154 (each vectorized inner-loop iteration contributes to VF outer-loop
8155 iterations in parallel). We therefore don't allow to change the
8156 order of the computation in the inner-loop during outer-loop
8158 /* TODO: Another case in which order doesn't *really* matter is when we
8159 widen and then contract again, e.g. (short)((int)x * y >> 8).
8160 Normally, pack_trunc performs an even/odd permute, whereas the
8161 repack from an even/odd expansion would be an interleave, which
8162 would be significantly simpler for e.g. AVX2. */
8163 /* In any case, in order to avoid duplicating the code below, recurse
8164 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8165 are properly set up for the caller. If we fail, we'll continue with
8166 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8168 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8169 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8170 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8171 stmt
, vectype_out
, vectype_in
,
8172 code1
, code2
, multi_step_cvt
,
8175 /* Elements in a vector with vect_used_by_reduction property cannot
8176 be reordered if the use chain with this property does not have the
8177 same operation. One such an example is s += a * b, where elements
8178 in a and b cannot be reordered. Here we check if the vector defined
8179 by STMT is only directly used in the reduction statement. */
8180 tree lhs
= gimple_assign_lhs (stmt
);
8181 use_operand_p dummy
;
8183 stmt_vec_info use_stmt_info
= NULL
;
8184 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8185 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8186 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8189 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8190 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8193 case VEC_WIDEN_MULT_EVEN_EXPR
:
8194 /* Support the recursion induced just above. */
8195 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8196 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8199 case WIDEN_LSHIFT_EXPR
:
8200 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8201 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8205 c1
= VEC_UNPACK_LO_EXPR
;
8206 c2
= VEC_UNPACK_HI_EXPR
;
8210 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8211 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8214 case FIX_TRUNC_EXPR
:
8215 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8216 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8217 computing the operation. */
8224 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8226 enum tree_code ctmp
= c1
;
8231 if (code
== FIX_TRUNC_EXPR
)
8233 /* The signedness is determined from output operand. */
8234 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8235 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8239 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8240 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8243 if (!optab1
|| !optab2
)
8246 vec_mode
= TYPE_MODE (vectype
);
8247 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8248 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8254 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8255 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8258 /* Check if it's a multi-step conversion that can be done using intermediate
8261 prev_type
= vectype
;
8262 prev_mode
= vec_mode
;
8264 if (!CONVERT_EXPR_CODE_P (code
))
8267 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8268 intermediate steps in promotion sequence. We try
8269 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8271 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8272 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8274 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8276 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8277 TYPE_UNSIGNED (prev_type
));
8278 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8279 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8281 if (!optab3
|| !optab4
8282 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8283 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8284 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8285 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
8286 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
8287 == CODE_FOR_nothing
)
8288 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
8289 == CODE_FOR_nothing
))
8292 interm_types
->quick_push (intermediate_type
);
8293 (*multi_step_cvt
)++;
8295 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8296 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8299 prev_type
= intermediate_type
;
8300 prev_mode
= intermediate_mode
;
8303 interm_types
->release ();
8308 /* Function supportable_narrowing_operation
8310 Check whether an operation represented by the code CODE is a
8311 narrowing operation that is supported by the target platform in
8312 vector form (i.e., when operating on arguments of type VECTYPE_IN
8313 and producing a result of type VECTYPE_OUT).
8315 Narrowing operations we currently support are NOP (CONVERT) and
8316 FIX_TRUNC. This function checks if these operations are supported by
8317 the target platform directly via vector tree-codes.
8320 - CODE1 is the code of a vector operation to be used when
8321 vectorizing the operation, if available.
8322 - MULTI_STEP_CVT determines the number of required intermediate steps in
8323 case of multi-step conversion (like int->short->char - in that case
8324 MULTI_STEP_CVT will be 1).
8325 - INTERM_TYPES contains the intermediate type required to perform the
8326 narrowing operation (short in the above example). */
8329 supportable_narrowing_operation (enum tree_code code
,
8330 tree vectype_out
, tree vectype_in
,
8331 enum tree_code
*code1
, int *multi_step_cvt
,
8332 vec
<tree
> *interm_types
)
8334 machine_mode vec_mode
;
8335 enum insn_code icode1
;
8336 optab optab1
, interm_optab
;
8337 tree vectype
= vectype_in
;
8338 tree narrow_vectype
= vectype_out
;
8340 tree intermediate_type
;
8341 machine_mode intermediate_mode
, prev_mode
;
8345 *multi_step_cvt
= 0;
8349 c1
= VEC_PACK_TRUNC_EXPR
;
8352 case FIX_TRUNC_EXPR
:
8353 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8357 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8358 tree code and optabs used for computing the operation. */
8365 if (code
== FIX_TRUNC_EXPR
)
8366 /* The signedness is determined from output operand. */
8367 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8369 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8374 vec_mode
= TYPE_MODE (vectype
);
8375 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8380 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8383 /* Check if it's a multi-step conversion that can be done using intermediate
8385 prev_mode
= vec_mode
;
8386 if (code
== FIX_TRUNC_EXPR
)
8387 uns
= TYPE_UNSIGNED (vectype_out
);
8389 uns
= TYPE_UNSIGNED (vectype
);
8391 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8392 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8393 costly than signed. */
8394 if (code
== FIX_TRUNC_EXPR
&& uns
)
8396 enum insn_code icode2
;
8399 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8401 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8402 if (interm_optab
!= unknown_optab
8403 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8404 && insn_data
[icode1
].operand
[0].mode
8405 == insn_data
[icode2
].operand
[0].mode
)
8408 optab1
= interm_optab
;
8413 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8414 intermediate steps in promotion sequence. We try
8415 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8416 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8417 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8419 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8421 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8423 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8426 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8427 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8428 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8429 == CODE_FOR_nothing
))
8432 interm_types
->quick_push (intermediate_type
);
8433 (*multi_step_cvt
)++;
8435 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8438 prev_mode
= intermediate_mode
;
8439 optab1
= interm_optab
;
8442 interm_types
->release ();