1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "double-int.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "hard-reg-set.h"
43 #include "dominance.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
50 #include "gimple-expr.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "gimple-ssa.h"
58 #include "tree-phinodes.h"
59 #include "ssa-iterators.h"
60 #include "stringpool.h"
61 #include "tree-ssanames.h"
62 #include "tree-ssa-loop-manip.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-scalar-evolution.h"
69 #include "statistics.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
81 #include "recog.h" /* FIXME: for insn_data */
82 #include "insn-codes.h"
84 #include "diagnostic-core.h"
85 #include "tree-vectorizer.h"
87 #include "plugin-api.h"
92 /* For lang_hooks.types.type_for_mode. */
93 #include "langhooks.h"
95 /* Return the vectorized type for the given statement. */
98 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
100 return STMT_VINFO_VECTYPE (stmt_info
);
103 /* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
106 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
108 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
109 basic_block bb
= gimple_bb (stmt
);
110 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
116 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
118 return (bb
->loop_father
== loop
->inner
);
121 /* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
126 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
127 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
128 int misalign
, enum vect_cost_model_location where
)
132 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
133 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
134 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
137 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
142 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
143 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
144 void *target_cost_data
;
147 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
149 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
151 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
156 /* Return a variable of type ELEM_TYPE[NELEMS]. */
159 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
161 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
165 /* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
171 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
172 tree array
, unsigned HOST_WIDE_INT n
)
174 tree vect_type
, vect
, vect_name
, array_ref
;
177 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
178 vect_type
= TREE_TYPE (TREE_TYPE (array
));
179 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
180 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
181 build_int_cst (size_type_node
, n
),
182 NULL_TREE
, NULL_TREE
);
184 new_stmt
= gimple_build_assign (vect
, array_ref
);
185 vect_name
= make_ssa_name (vect
, new_stmt
);
186 gimple_assign_set_lhs (new_stmt
, vect_name
);
187 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
192 /* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
197 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
198 tree array
, unsigned HOST_WIDE_INT n
)
203 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
204 build_int_cst (size_type_node
, n
),
205 NULL_TREE
, NULL_TREE
);
207 new_stmt
= gimple_build_assign (array_ref
, vect
);
208 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
211 /* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
216 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
218 tree mem_ref
, alias_ptr_type
;
220 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
221 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
222 /* Arrays have the same alignment as their type. */
223 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
227 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
229 /* Function vect_mark_relevant.
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
234 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
235 enum vect_relevant relevant
, bool live_p
,
236 bool used_in_pattern
)
238 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
239 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
240 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "mark relevant %d, live %d.\n", relevant
, live_p
);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
254 if (!used_in_pattern
)
256 imm_use_iterator imm_iter
;
260 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
261 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
263 if (is_gimple_assign (stmt
))
264 lhs
= gimple_assign_lhs (stmt
);
266 lhs
= gimple_call_lhs (stmt
);
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
271 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
272 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
274 if (is_gimple_debug (USE_STMT (use_p
)))
276 use_stmt
= USE_STMT (use_p
);
278 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
281 if (vinfo_for_stmt (use_stmt
)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
297 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE
, vect_location
,
301 "last stmt in pattern. don't mark"
302 " relevant/live.\n");
303 stmt_info
= vinfo_for_stmt (pattern_stmt
);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
305 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
306 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
311 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
312 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
313 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
315 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE
, vect_location
,
320 "already marked relevant/live.\n");
324 worklist
->safe_push (stmt
);
328 /* Function vect_stmt_relevant_p.
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
338 CHECKME: what other side effects would the vectorizer allow? */
341 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
342 enum vect_relevant
*relevant
, bool *live_p
)
344 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
346 imm_use_iterator imm_iter
;
350 *relevant
= vect_unused_in_scope
;
353 /* cond stmt other than loop exit cond. */
354 if (is_ctrl_stmt (stmt
)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
356 != loop_exit_ctrl_vec_info_type
)
357 *relevant
= vect_used_in_scope
;
359 /* changing memory. */
360 if (gimple_code (stmt
) != GIMPLE_PHI
)
361 if (gimple_vdef (stmt
)
362 && !gimple_clobber_p (stmt
))
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_NOTE
, vect_location
,
366 "vec_stmt_relevant_p: stmt has vdefs.\n");
367 *relevant
= vect_used_in_scope
;
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
373 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
375 basic_block bb
= gimple_bb (USE_STMT (use_p
));
376 if (!flow_bb_inside_loop_p (loop
, bb
))
378 if (dump_enabled_p ())
379 dump_printf_loc (MSG_NOTE
, vect_location
,
380 "vec_stmt_relevant_p: used out of loop.\n");
382 if (is_gimple_debug (USE_STMT (use_p
)))
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
388 gcc_assert (bb
== single_exit (loop
)->dest
);
395 return (*live_p
|| *relevant
);
399 /* Function exist_non_indexing_operands_for_use_p
401 USE is one of the uses attached to STMT. Check if USE is
402 used in STMT for anything other than indexing an array. */
405 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
408 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
410 /* USE corresponds to some operand in STMT. If there is no data
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info
))
416 /* STMT has a data_ref. FORNOW this means that its of one of
420 (This should have been verified in analyze_data_refs).
422 'var' in the second case corresponds to a def, not a use,
423 so USE cannot correspond to any operands that are not used
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
429 if (!gimple_assign_copy_p (stmt
))
431 if (is_gimple_call (stmt
)
432 && gimple_call_internal_p (stmt
))
433 switch (gimple_call_internal_fn (stmt
))
436 operand
= gimple_call_arg (stmt
, 3);
441 operand
= gimple_call_arg (stmt
, 2);
451 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
453 operand
= gimple_assign_rhs1 (stmt
);
454 if (TREE_CODE (operand
) != SSA_NAME
)
465 Function process_use.
468 - a USE in STMT in a loop represented by LOOP_VINFO
469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470 that defined USE. This is done by calling mark_relevant and passing it
471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
481 - case 1: If USE is used only for address computations (e.g. array indexing),
482 which does not need to be directly vectorized, then the liveness/relevance
483 of the respective DEF_STMT is left unchanged.
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
489 Return true if everything is as expected. Return false otherwise. */
492 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
493 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
496 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
497 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
498 stmt_vec_info dstmt_vinfo
;
499 basic_block bb
, def_bb
;
502 enum vect_def_type dt
;
504 /* case 1: we are only interested in uses that need to be vectorized. Uses
505 that are used for address computation are not considered relevant. */
506 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
509 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
513 "not vectorized: unsupported use in stmt.\n");
517 if (!def_stmt
|| gimple_nop_p (def_stmt
))
520 def_bb
= gimple_bb (def_stmt
);
521 if (!flow_bb_inside_loop_p (loop
, def_bb
))
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
534 bb
= gimple_bb (stmt
);
535 if (gimple_code (stmt
) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
537 && gimple_code (def_stmt
) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
539 && bb
->loop_father
== def_bb
->loop_father
)
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE
, vect_location
,
543 "reduc-stmt defining reduc-phi in the same nest.\n");
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
545 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
548 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
559 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE
, vect_location
,
563 "outer-loop def-stmt defining inner-loop stmt.\n");
567 case vect_unused_in_scope
:
568 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
569 vect_used_in_scope
: vect_unused_in_scope
;
572 case vect_used_in_outer_by_reduction
:
573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
574 relevant
= vect_used_by_reduction
;
577 case vect_used_in_outer
:
578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
579 relevant
= vect_used_in_scope
;
582 case vect_used_in_scope
:
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
597 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE
, vect_location
,
601 "inner-loop def-stmt defining outer-loop stmt.\n");
605 case vect_unused_in_scope
:
606 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
607 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
608 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
611 case vect_used_by_reduction
:
612 relevant
= vect_used_in_outer_by_reduction
;
615 case vect_used_in_scope
:
616 relevant
= vect_used_in_outer
;
624 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
625 is_pattern_stmt_p (stmt_vinfo
));
630 /* Function vect_mark_stmts_to_be_vectorized.
632 Not all stmts in the loop need to be vectorized. For example:
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
644 This pass detects such stmts. */
647 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
649 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
650 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
651 unsigned int nbbs
= loop
->num_nodes
;
652 gimple_stmt_iterator si
;
655 stmt_vec_info stmt_vinfo
;
659 enum vect_relevant relevant
, tmp_relevant
;
660 enum vect_def_type def_type
;
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_NOTE
, vect_location
,
664 "=== vect_mark_stmts_to_be_vectorized ===\n");
666 auto_vec
<gimple
, 64> worklist
;
668 /* 1. Init worklist. */
669 for (i
= 0; i
< nbbs
; i
++)
672 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
681 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
682 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
684 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
686 stmt
= gsi_stmt (si
);
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
690 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
693 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
694 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
698 /* 2. Process_worklist */
699 while (worklist
.length () > 0)
704 stmt
= worklist
.pop ();
705 if (dump_enabled_p ())
707 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
708 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
713 liveness and relevance properties of STMT. */
714 stmt_vinfo
= vinfo_for_stmt (stmt
);
715 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
716 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
718 /* Generally, the liveness and relevance properties of STMT are
719 propagated as is to the DEF_STMTs of its USEs:
720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
723 One exception is when STMT has been identified as defining a reduction
724 variable; in this case we set the liveness/relevance as follows:
726 relevant = vect_used_by_reduction
727 This is because we distinguish between two kinds of relevant stmts -
728 those that are used by a reduction computation, and those that are
729 (also) used by a regular computation. This allows us later on to
730 identify stmts that are used solely by a reduction, and therefore the
731 order of the results that they produce does not have to be kept. */
733 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
734 tmp_relevant
= relevant
;
737 case vect_reduction_def
:
738 switch (tmp_relevant
)
740 case vect_unused_in_scope
:
741 relevant
= vect_used_by_reduction
;
744 case vect_used_by_reduction
:
745 if (gimple_code (stmt
) == GIMPLE_PHI
)
750 if (dump_enabled_p ())
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
752 "unsupported use of reduction.\n");
759 case vect_nested_cycle
:
760 if (tmp_relevant
!= vect_unused_in_scope
761 && tmp_relevant
!= vect_used_in_outer_by_reduction
762 && tmp_relevant
!= vect_used_in_outer
)
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
766 "unsupported use of nested cycle.\n");
774 case vect_double_reduction_def
:
775 if (tmp_relevant
!= vect_unused_in_scope
776 && tmp_relevant
!= vect_used_by_reduction
)
778 if (dump_enabled_p ())
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
780 "unsupported use of double reduction.\n");
792 if (is_pattern_stmt_p (stmt_vinfo
))
794 /* Pattern statements are not inserted into the code, so
795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796 have to scan the RHS or function arguments instead. */
797 if (is_gimple_assign (stmt
))
799 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
800 tree op
= gimple_assign_rhs1 (stmt
);
803 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
805 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
806 live_p
, relevant
, &worklist
, false)
807 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
808 live_p
, relevant
, &worklist
, false))
812 for (; i
< gimple_num_ops (stmt
); i
++)
814 op
= gimple_op (stmt
, i
);
815 if (TREE_CODE (op
) == SSA_NAME
816 && !process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
821 else if (is_gimple_call (stmt
))
823 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
825 tree arg
= gimple_call_arg (stmt
, i
);
826 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
833 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
835 tree op
= USE_FROM_PTR (use_p
);
836 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
841 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
844 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
846 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
850 } /* while worklist */
856 /* Function vect_model_simple_cost.
858 Models cost for simple operations, i.e. those that only emit ncopies of a
859 single op. Right now, this does not account for multiple insns that could
860 be generated for the single vector op. We will handle that shortly. */
863 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
864 enum vect_def_type
*dt
,
865 stmt_vector_for_cost
*prologue_cost_vec
,
866 stmt_vector_for_cost
*body_cost_vec
)
869 int inside_cost
= 0, prologue_cost
= 0;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info
))
875 /* FORNOW: Assuming maximum 2 args per stmts. */
876 for (i
= 0; i
< 2; i
++)
877 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
878 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
879 stmt_info
, 0, vect_prologue
);
881 /* Pass the inside-of-loop statements to the target-specific cost model. */
882 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
883 stmt_info
, 0, vect_body
);
885 if (dump_enabled_p ())
886 dump_printf_loc (MSG_NOTE
, vect_location
,
887 "vect_model_simple_cost: inside_cost = %d, "
888 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
892 /* Model cost for type demotion and promotion operations. PWR is normally
893 zero for single-step promotions and demotions. It will be one if
894 two-step promotion/demotion is required, and so on. Each additional
895 step doubles the number of instructions required. */
898 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
899 enum vect_def_type
*dt
, int pwr
)
902 int inside_cost
= 0, prologue_cost
= 0;
903 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
904 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
905 void *target_cost_data
;
907 /* The SLP costs were already calculated during SLP tree build. */
908 if (PURE_SLP_STMT (stmt_info
))
912 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
914 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
916 for (i
= 0; i
< pwr
+ 1; i
++)
918 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
920 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
921 vec_promote_demote
, stmt_info
, 0,
925 /* FORNOW: Assuming maximum 2 args per stmts. */
926 for (i
= 0; i
< 2; i
++)
927 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
928 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
929 stmt_info
, 0, vect_prologue
);
931 if (dump_enabled_p ())
932 dump_printf_loc (MSG_NOTE
, vect_location
,
933 "vect_model_promotion_demotion_cost: inside_cost = %d, "
934 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
937 /* Function vect_cost_group_size
939 For grouped load or store, return the group_size only if it is the first
940 load or store of a group, else return 1. This ensures that group size is
941 only returned once per group. */
944 vect_cost_group_size (stmt_vec_info stmt_info
)
946 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
948 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
949 return GROUP_SIZE (stmt_info
);
955 /* Function vect_model_store_cost
957 Models cost for stores. In the case of grouped accesses, one access
958 has the overhead of the grouped access attributed to it. */
961 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
962 bool store_lanes_p
, enum vect_def_type dt
,
964 stmt_vector_for_cost
*prologue_cost_vec
,
965 stmt_vector_for_cost
*body_cost_vec
)
968 unsigned int inside_cost
= 0, prologue_cost
= 0;
969 struct data_reference
*first_dr
;
972 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
973 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
974 stmt_info
, 0, vect_prologue
);
976 /* Grouped access? */
977 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
981 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
986 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
987 group_size
= vect_cost_group_size (stmt_info
);
990 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
992 /* Not a grouped access. */
996 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
999 /* We assume that the cost of a single store-lanes instruction is
1000 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
1001 access is instead being provided by a permute-and-store operation,
1002 include the cost of the permutes. */
1003 if (!store_lanes_p
&& group_size
> 1)
1005 /* Uses a high and low interleave or shuffle operations for each
1007 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1008 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1009 stmt_info
, 0, vect_body
);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE
, vect_location
,
1013 "vect_model_store_cost: strided group_size = %d .\n",
1017 /* Costs of the stores. */
1018 if (STMT_VINFO_STRIDED_P (stmt_info
))
1020 /* N scalar stores plus extracting the elements. */
1021 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1022 inside_cost
+= record_stmt_cost (body_cost_vec
,
1023 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1024 scalar_store
, stmt_info
, 0, vect_body
);
1025 inside_cost
+= record_stmt_cost (body_cost_vec
,
1026 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1027 vec_to_scalar
, stmt_info
, 0, vect_body
);
1030 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1032 if (dump_enabled_p ())
1033 dump_printf_loc (MSG_NOTE
, vect_location
,
1034 "vect_model_store_cost: inside_cost = %d, "
1035 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1039 /* Calculate cost of DR's memory access. */
1041 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1042 unsigned int *inside_cost
,
1043 stmt_vector_for_cost
*body_cost_vec
)
1045 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1046 gimple stmt
= DR_STMT (dr
);
1047 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1049 switch (alignment_support_scheme
)
1053 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1054 vector_store
, stmt_info
, 0,
1057 if (dump_enabled_p ())
1058 dump_printf_loc (MSG_NOTE
, vect_location
,
1059 "vect_model_store_cost: aligned.\n");
1063 case dr_unaligned_supported
:
1065 /* Here, we assign an additional cost for the unaligned store. */
1066 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1067 unaligned_store
, stmt_info
,
1068 DR_MISALIGNMENT (dr
), vect_body
);
1069 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_NOTE
, vect_location
,
1071 "vect_model_store_cost: unaligned supported by "
1076 case dr_unaligned_unsupported
:
1078 *inside_cost
= VECT_MAX_COST
;
1080 if (dump_enabled_p ())
1081 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1082 "vect_model_store_cost: unsupported access.\n");
1092 /* Function vect_model_load_cost
1094 Models cost for loads. In the case of grouped accesses, the last access
1095 has the overhead of the grouped access attributed to it. Since unaligned
1096 accesses are supported for loads, we also account for the costs of the
1097 access scheme chosen. */
1100 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1101 bool load_lanes_p
, slp_tree slp_node
,
1102 stmt_vector_for_cost
*prologue_cost_vec
,
1103 stmt_vector_for_cost
*body_cost_vec
)
1107 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1108 unsigned int inside_cost
= 0, prologue_cost
= 0;
1110 /* Grouped accesses? */
1111 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1112 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1114 group_size
= vect_cost_group_size (stmt_info
);
1115 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1117 /* Not a grouped access. */
1124 /* We assume that the cost of a single load-lanes instruction is
1125 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1126 access is instead being provided by a load-and-permute operation,
1127 include the cost of the permutes. */
1128 if (!load_lanes_p
&& group_size
> 1
1129 && !STMT_VINFO_STRIDED_P (stmt_info
))
1131 /* Uses an even and odd extract operations or shuffle operations
1132 for each needed permute. */
1133 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1134 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1135 stmt_info
, 0, vect_body
);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE
, vect_location
,
1139 "vect_model_load_cost: strided group_size = %d .\n",
1143 /* The loads themselves. */
1144 if (STMT_VINFO_STRIDED_P (stmt_info
)
1145 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1147 /* N scalar loads plus gathering them into a vector. */
1148 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1149 inside_cost
+= record_stmt_cost (body_cost_vec
,
1150 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1151 scalar_load
, stmt_info
, 0, vect_body
);
1154 vect_get_load_cost (first_dr
, ncopies
,
1155 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1156 || group_size
> 1 || slp_node
),
1157 &inside_cost
, &prologue_cost
,
1158 prologue_cost_vec
, body_cost_vec
, true);
1159 if (STMT_VINFO_STRIDED_P (stmt_info
))
1160 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1161 stmt_info
, 0, vect_body
);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE
, vect_location
,
1165 "vect_model_load_cost: inside_cost = %d, "
1166 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1170 /* Calculate cost of DR's memory access. */
1172 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1173 bool add_realign_cost
, unsigned int *inside_cost
,
1174 unsigned int *prologue_cost
,
1175 stmt_vector_for_cost
*prologue_cost_vec
,
1176 stmt_vector_for_cost
*body_cost_vec
,
1177 bool record_prologue_costs
)
1179 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1180 gimple stmt
= DR_STMT (dr
);
1181 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1183 switch (alignment_support_scheme
)
1187 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1188 stmt_info
, 0, vect_body
);
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_NOTE
, vect_location
,
1192 "vect_model_load_cost: aligned.\n");
1196 case dr_unaligned_supported
:
1198 /* Here, we assign an additional cost for the unaligned load. */
1199 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1200 unaligned_load
, stmt_info
,
1201 DR_MISALIGNMENT (dr
), vect_body
);
1203 if (dump_enabled_p ())
1204 dump_printf_loc (MSG_NOTE
, vect_location
,
1205 "vect_model_load_cost: unaligned supported by "
1210 case dr_explicit_realign
:
1212 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1213 vector_load
, stmt_info
, 0, vect_body
);
1214 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1215 vec_perm
, stmt_info
, 0, vect_body
);
1217 /* FIXME: If the misalignment remains fixed across the iterations of
1218 the containing loop, the following cost should be added to the
1220 if (targetm
.vectorize
.builtin_mask_for_load
)
1221 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1222 stmt_info
, 0, vect_body
);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE
, vect_location
,
1226 "vect_model_load_cost: explicit realign\n");
1230 case dr_explicit_realign_optimized
:
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE
, vect_location
,
1234 "vect_model_load_cost: unaligned software "
1237 /* Unaligned software pipeline has a load of an address, an initial
1238 load, and possibly a mask operation to "prime" the loop. However,
1239 if this is an access in a group of loads, which provide grouped
1240 access, then the above cost should only be considered for one
1241 access in the group. Inside the loop, there is a load op
1242 and a realignment op. */
1244 if (add_realign_cost
&& record_prologue_costs
)
1246 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1247 vector_stmt
, stmt_info
,
1249 if (targetm
.vectorize
.builtin_mask_for_load
)
1250 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1251 vector_stmt
, stmt_info
,
1255 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1256 stmt_info
, 0, vect_body
);
1257 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1258 stmt_info
, 0, vect_body
);
1260 if (dump_enabled_p ())
1261 dump_printf_loc (MSG_NOTE
, vect_location
,
1262 "vect_model_load_cost: explicit realign optimized"
1268 case dr_unaligned_unsupported
:
1270 *inside_cost
= VECT_MAX_COST
;
1272 if (dump_enabled_p ())
1273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1274 "vect_model_load_cost: unsupported access.\n");
1283 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1284 the loop preheader for the vectorized stmt STMT. */
1287 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1290 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1293 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1294 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1298 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1302 if (nested_in_vect_loop_p (loop
, stmt
))
1305 pe
= loop_preheader_edge (loop
);
1306 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1307 gcc_assert (!new_bb
);
1311 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1313 gimple_stmt_iterator gsi_bb_start
;
1315 gcc_assert (bb_vinfo
);
1316 bb
= BB_VINFO_BB (bb_vinfo
);
1317 gsi_bb_start
= gsi_after_labels (bb
);
1318 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1322 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_NOTE
, vect_location
,
1325 "created new init_stmt: ");
1326 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1330 /* Function vect_init_vector.
1332 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1333 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1334 vector type a vector with all elements equal to VAL is created first.
1335 Place the initialization at BSI if it is not NULL. Otherwise, place the
1336 initialization at the loop preheader.
1337 Return the DEF of INIT_STMT.
1338 It will be used in the vectorization of STMT. */
1341 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1348 if (TREE_CODE (type
) == VECTOR_TYPE
1349 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1351 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1353 if (CONSTANT_CLASS_P (val
))
1354 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1357 new_temp
= make_ssa_name (TREE_TYPE (type
));
1358 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1359 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1363 val
= build_vector_from_val (type
, val
);
1366 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1367 init_stmt
= gimple_build_assign (new_var
, val
);
1368 new_temp
= make_ssa_name (new_var
, init_stmt
);
1369 gimple_assign_set_lhs (init_stmt
, new_temp
);
1370 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1371 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1376 /* Function vect_get_vec_def_for_operand.
1378 OP is an operand in STMT. This function returns a (vector) def that will be
1379 used in the vectorized stmt for STMT.
1381 In the case that OP is an SSA_NAME which is defined in the loop, then
1382 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1384 In case OP is an invariant or constant, a new stmt that creates a vector def
1385 needs to be introduced. */
1388 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1393 stmt_vec_info def_stmt_info
= NULL
;
1394 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1395 unsigned int nunits
;
1396 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1398 enum vect_def_type dt
;
1402 if (dump_enabled_p ())
1404 dump_printf_loc (MSG_NOTE
, vect_location
,
1405 "vect_get_vec_def_for_operand: ");
1406 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1407 dump_printf (MSG_NOTE
, "\n");
1410 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1411 &def_stmt
, &def
, &dt
);
1412 gcc_assert (is_simple_use
);
1413 if (dump_enabled_p ())
1415 int loc_printed
= 0;
1418 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1420 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1421 dump_printf (MSG_NOTE
, "\n");
1426 dump_printf (MSG_NOTE
, " def_stmt = ");
1428 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1429 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1435 /* Case 1: operand is a constant. */
1436 case vect_constant_def
:
1438 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1439 gcc_assert (vector_type
);
1440 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1445 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1446 if (dump_enabled_p ())
1447 dump_printf_loc (MSG_NOTE
, vect_location
,
1448 "Create vector_cst. nunits = %d\n", nunits
);
1450 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1453 /* Case 2: operand is defined outside the loop - loop invariant. */
1454 case vect_external_def
:
1456 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1457 gcc_assert (vector_type
);
1462 /* Create 'vec_inv = {inv,inv,..,inv}' */
1463 if (dump_enabled_p ())
1464 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1466 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1469 /* Case 3: operand is defined inside the loop. */
1470 case vect_internal_def
:
1473 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1475 /* Get the def from the vectorized stmt. */
1476 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1478 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1479 /* Get vectorized pattern statement. */
1481 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1482 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1483 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1484 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1485 gcc_assert (vec_stmt
);
1486 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1487 vec_oprnd
= PHI_RESULT (vec_stmt
);
1488 else if (is_gimple_call (vec_stmt
))
1489 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1491 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1495 /* Case 4: operand is defined by a loop header phi - reduction */
1496 case vect_reduction_def
:
1497 case vect_double_reduction_def
:
1498 case vect_nested_cycle
:
1502 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1503 loop
= (gimple_bb (def_stmt
))->loop_father
;
1505 /* Get the def before the loop */
1506 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1507 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1510 /* Case 5: operand is defined by loop-header phi - induction. */
1511 case vect_induction_def
:
1513 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1515 /* Get the def from the vectorized stmt. */
1516 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1517 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1518 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1519 vec_oprnd
= PHI_RESULT (vec_stmt
);
1521 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1531 /* Function vect_get_vec_def_for_stmt_copy
1533 Return a vector-def for an operand. This function is used when the
1534 vectorized stmt to be created (by the caller to this function) is a "copy"
1535 created in case the vectorized result cannot fit in one vector, and several
1536 copies of the vector-stmt are required. In this case the vector-def is
1537 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1538 of the stmt that defines VEC_OPRND.
1539 DT is the type of the vector def VEC_OPRND.
1542 In case the vectorization factor (VF) is bigger than the number
1543 of elements that can fit in a vectype (nunits), we have to generate
1544 more than one vector stmt to vectorize the scalar stmt. This situation
1545 arises when there are multiple data-types operated upon in the loop; the
1546 smallest data-type determines the VF, and as a result, when vectorizing
1547 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1548 vector stmt (each computing a vector of 'nunits' results, and together
1549 computing 'VF' results in each iteration). This function is called when
1550 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1551 which VF=16 and nunits=4, so the number of copies required is 4):
1553 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1555 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1556 VS1.1: vx.1 = memref1 VS1.2
1557 VS1.2: vx.2 = memref2 VS1.3
1558 VS1.3: vx.3 = memref3
1560 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1561 VSnew.1: vz1 = vx.1 + ... VSnew.2
1562 VSnew.2: vz2 = vx.2 + ... VSnew.3
1563 VSnew.3: vz3 = vx.3 + ...
1565 The vectorization of S1 is explained in vectorizable_load.
1566 The vectorization of S2:
1567 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1568 the function 'vect_get_vec_def_for_operand' is called to
1569 get the relevant vector-def for each operand of S2. For operand x it
1570 returns the vector-def 'vx.0'.
1572 To create the remaining copies of the vector-stmt (VSnew.j), this
1573 function is called to get the relevant vector-def for each operand. It is
1574 obtained from the respective VS1.j stmt, which is recorded in the
1575 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1577 For example, to obtain the vector-def 'vx.1' in order to create the
1578 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1579 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1580 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1581 and return its def ('vx.1').
1582 Overall, to create the above sequence this function will be called 3 times:
1583 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1584 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1585 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1588 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1590 gimple vec_stmt_for_operand
;
1591 stmt_vec_info def_stmt_info
;
1593 /* Do nothing; can reuse same def. */
1594 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1597 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1598 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1599 gcc_assert (def_stmt_info
);
1600 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1601 gcc_assert (vec_stmt_for_operand
);
1602 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1603 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1604 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1606 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1611 /* Get vectorized definitions for the operands to create a copy of an original
1612 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1615 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1616 vec
<tree
> *vec_oprnds0
,
1617 vec
<tree
> *vec_oprnds1
)
1619 tree vec_oprnd
= vec_oprnds0
->pop ();
1621 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1622 vec_oprnds0
->quick_push (vec_oprnd
);
1624 if (vec_oprnds1
&& vec_oprnds1
->length ())
1626 vec_oprnd
= vec_oprnds1
->pop ();
1627 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1628 vec_oprnds1
->quick_push (vec_oprnd
);
1633 /* Get vectorized definitions for OP0 and OP1.
1634 REDUC_INDEX is the index of reduction operand in case of reduction,
1635 and -1 otherwise. */
1638 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1639 vec
<tree
> *vec_oprnds0
,
1640 vec
<tree
> *vec_oprnds1
,
1641 slp_tree slp_node
, int reduc_index
)
1645 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1646 auto_vec
<tree
> ops (nops
);
1647 auto_vec
<vec
<tree
> > vec_defs (nops
);
1649 ops
.quick_push (op0
);
1651 ops
.quick_push (op1
);
1653 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1655 *vec_oprnds0
= vec_defs
[0];
1657 *vec_oprnds1
= vec_defs
[1];
1663 vec_oprnds0
->create (1);
1664 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1665 vec_oprnds0
->quick_push (vec_oprnd
);
1669 vec_oprnds1
->create (1);
1670 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1671 vec_oprnds1
->quick_push (vec_oprnd
);
1677 /* Function vect_finish_stmt_generation.
1679 Insert a new stmt. */
1682 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1683 gimple_stmt_iterator
*gsi
)
1685 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1686 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1687 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1689 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1691 if (!gsi_end_p (*gsi
)
1692 && gimple_has_mem_ops (vec_stmt
))
1694 gimple at_stmt
= gsi_stmt (*gsi
);
1695 tree vuse
= gimple_vuse (at_stmt
);
1696 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1698 tree vdef
= gimple_vdef (at_stmt
);
1699 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1700 /* If we have an SSA vuse and insert a store, update virtual
1701 SSA form to avoid triggering the renamer. Do so only
1702 if we can easily see all uses - which is what almost always
1703 happens with the way vectorized stmts are inserted. */
1704 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1705 && ((is_gimple_assign (vec_stmt
)
1706 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1707 || (is_gimple_call (vec_stmt
)
1708 && !(gimple_call_flags (vec_stmt
)
1709 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1711 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1712 gimple_set_vdef (vec_stmt
, new_vdef
);
1713 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1717 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1719 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1722 if (dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1725 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1728 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1730 /* While EH edges will generally prevent vectorization, stmt might
1731 e.g. be in a must-not-throw region. Ensure newly created stmts
1732 that could throw are part of the same region. */
1733 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1734 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1735 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1738 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1739 a function declaration if the target has a vectorized version
1740 of the function, or NULL_TREE if the function cannot be vectorized. */
1743 vectorizable_function (gcall
*call
, tree vectype_out
, tree vectype_in
)
1745 tree fndecl
= gimple_call_fndecl (call
);
1747 /* We only handle functions that do not read or clobber memory -- i.e.
1748 const or novops ones. */
1749 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1753 || TREE_CODE (fndecl
) != FUNCTION_DECL
1754 || !DECL_BUILT_IN (fndecl
))
1757 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1762 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1763 gimple_stmt_iterator
*);
1766 /* Function vectorizable_mask_load_store.
1768 Check if STMT performs a conditional load or store that can be vectorized.
1769 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1770 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1771 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1774 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1775 gimple
*vec_stmt
, slp_tree slp_node
)
1777 tree vec_dest
= NULL
;
1778 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1779 stmt_vec_info prev_stmt_info
;
1780 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1781 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1782 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1783 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1784 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1788 tree dataref_ptr
= NULL_TREE
;
1790 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1794 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1795 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1796 int gather_scale
= 1;
1797 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1802 enum vect_def_type dt
;
1804 if (slp_node
!= NULL
)
1807 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1808 gcc_assert (ncopies
>= 1);
1810 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1811 mask
= gimple_call_arg (stmt
, 2);
1812 if (TYPE_PRECISION (TREE_TYPE (mask
))
1813 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1816 /* FORNOW. This restriction should be relaxed. */
1817 if (nested_in_vect_loop
&& ncopies
> 1)
1819 if (dump_enabled_p ())
1820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1821 "multiple types in nested loop.");
1825 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1828 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1831 if (!STMT_VINFO_DATA_REF (stmt_info
))
1834 elem_type
= TREE_TYPE (vectype
);
1836 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1839 if (STMT_VINFO_STRIDED_P (stmt_info
))
1842 if (STMT_VINFO_GATHER_P (stmt_info
))
1846 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1847 &gather_off
, &gather_scale
);
1848 gcc_assert (gather_decl
);
1849 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1850 &def_stmt
, &def
, &gather_dt
,
1851 &gather_off_vectype
))
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1855 "gather index use not simple.");
1859 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1861 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1862 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1866 "masked gather with integer mask not supported.");
1870 else if (tree_int_cst_compare (nested_in_vect_loop
1871 ? STMT_VINFO_DR_STEP (stmt_info
)
1872 : DR_STEP (dr
), size_zero_node
) <= 0)
1874 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1875 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1878 if (TREE_CODE (mask
) != SSA_NAME
)
1881 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1882 &def_stmt
, &def
, &dt
))
1887 tree rhs
= gimple_call_arg (stmt
, 3);
1888 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1889 &def_stmt
, &def
, &dt
))
1893 if (!vec_stmt
) /* transformation not required. */
1895 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1897 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1900 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1906 if (STMT_VINFO_GATHER_P (stmt_info
))
1908 tree vec_oprnd0
= NULL_TREE
, op
;
1909 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1910 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1911 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1912 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1913 tree mask_perm_mask
= NULL_TREE
;
1914 edge pe
= loop_preheader_edge (loop
);
1917 enum { NARROW
, NONE
, WIDEN
} modifier
;
1918 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1920 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1921 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1922 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1923 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1924 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1925 scaletype
= TREE_VALUE (arglist
);
1926 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1927 && types_compatible_p (srctype
, masktype
));
1929 if (nunits
== gather_off_nunits
)
1931 else if (nunits
== gather_off_nunits
/ 2)
1933 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1936 for (i
= 0; i
< gather_off_nunits
; ++i
)
1937 sel
[i
] = i
| nunits
;
1939 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1941 else if (nunits
== gather_off_nunits
* 2)
1943 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1946 for (i
= 0; i
< nunits
; ++i
)
1947 sel
[i
] = i
< gather_off_nunits
1948 ? i
: i
+ nunits
- gather_off_nunits
;
1950 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1952 for (i
= 0; i
< nunits
; ++i
)
1953 sel
[i
] = i
| gather_off_nunits
;
1954 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1959 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1961 ptr
= fold_convert (ptrtype
, gather_base
);
1962 if (!is_gimple_min_invariant (ptr
))
1964 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1965 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1966 gcc_assert (!new_bb
);
1969 scale
= build_int_cst (scaletype
, gather_scale
);
1971 prev_stmt_info
= NULL
;
1972 for (j
= 0; j
< ncopies
; ++j
)
1974 if (modifier
== WIDEN
&& (j
& 1))
1975 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1976 perm_mask
, stmt
, gsi
);
1979 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1982 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1984 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1986 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1987 == TYPE_VECTOR_SUBPARTS (idxtype
));
1988 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1989 var
= make_ssa_name (var
);
1990 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1992 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1993 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1997 if (mask_perm_mask
&& (j
& 1))
1998 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1999 mask_perm_mask
, stmt
, gsi
);
2003 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2006 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
2007 &def_stmt
, &def
, &dt
);
2008 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2012 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2014 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2015 == TYPE_VECTOR_SUBPARTS (masktype
));
2016 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
2018 var
= make_ssa_name (var
);
2019 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2021 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2022 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2028 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2031 if (!useless_type_conversion_p (vectype
, rettype
))
2033 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2034 == TYPE_VECTOR_SUBPARTS (rettype
));
2035 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2036 op
= make_ssa_name (var
, new_stmt
);
2037 gimple_call_set_lhs (new_stmt
, op
);
2038 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2039 var
= make_ssa_name (vec_dest
);
2040 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2041 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2045 var
= make_ssa_name (vec_dest
, new_stmt
);
2046 gimple_call_set_lhs (new_stmt
, var
);
2049 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2051 if (modifier
== NARROW
)
2058 var
= permute_vec_elements (prev_res
, var
,
2059 perm_mask
, stmt
, gsi
);
2060 new_stmt
= SSA_NAME_DEF_STMT (var
);
2063 if (prev_stmt_info
== NULL
)
2064 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2066 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2067 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2070 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2072 tree lhs
= gimple_call_lhs (stmt
);
2073 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2074 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2075 set_vinfo_for_stmt (stmt
, NULL
);
2076 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2077 gsi_replace (gsi
, new_stmt
, true);
2082 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2083 prev_stmt_info
= NULL
;
2084 for (i
= 0; i
< ncopies
; i
++)
2086 unsigned align
, misalign
;
2090 tree rhs
= gimple_call_arg (stmt
, 3);
2091 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2092 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2093 /* We should have catched mismatched types earlier. */
2094 gcc_assert (useless_type_conversion_p (vectype
,
2095 TREE_TYPE (vec_rhs
)));
2096 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2097 NULL_TREE
, &dummy
, gsi
,
2098 &ptr_incr
, false, &inv_p
);
2099 gcc_assert (!inv_p
);
2103 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2105 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2106 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2108 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2109 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2110 TYPE_SIZE_UNIT (vectype
));
2113 align
= TYPE_ALIGN_UNIT (vectype
);
2114 if (aligned_access_p (dr
))
2116 else if (DR_MISALIGNMENT (dr
) == -1)
2118 align
= TYPE_ALIGN_UNIT (elem_type
);
2122 misalign
= DR_MISALIGNMENT (dr
);
2123 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2126 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2127 gimple_call_arg (stmt
, 1),
2129 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2131 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2133 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2134 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2139 tree vec_mask
= NULL_TREE
;
2140 prev_stmt_info
= NULL
;
2141 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2142 for (i
= 0; i
< ncopies
; i
++)
2144 unsigned align
, misalign
;
2148 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2149 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2150 NULL_TREE
, &dummy
, gsi
,
2151 &ptr_incr
, false, &inv_p
);
2152 gcc_assert (!inv_p
);
2156 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2158 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2159 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2160 TYPE_SIZE_UNIT (vectype
));
2163 align
= TYPE_ALIGN_UNIT (vectype
);
2164 if (aligned_access_p (dr
))
2166 else if (DR_MISALIGNMENT (dr
) == -1)
2168 align
= TYPE_ALIGN_UNIT (elem_type
);
2172 misalign
= DR_MISALIGNMENT (dr
);
2173 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2176 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2177 gimple_call_arg (stmt
, 1),
2179 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2180 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2182 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2184 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2185 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2191 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2193 tree lhs
= gimple_call_lhs (stmt
);
2194 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2195 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2196 set_vinfo_for_stmt (stmt
, NULL
);
2197 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2198 gsi_replace (gsi
, new_stmt
, true);
2205 /* Function vectorizable_call.
2207 Check if GS performs a function call that can be vectorized.
2208 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2209 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2210 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2213 vectorizable_call (gimple gs
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2220 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2221 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2222 tree vectype_out
, vectype_in
;
2225 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2226 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2227 tree fndecl
, new_temp
, def
, rhs_type
;
2229 enum vect_def_type dt
[3]
2230 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2231 gimple new_stmt
= NULL
;
2233 vec
<tree
> vargs
= vNULL
;
2234 enum { NARROW
, NONE
, WIDEN
} modifier
;
2238 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2241 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2244 /* Is GS a vectorizable call? */
2245 stmt
= dyn_cast
<gcall
*> (gs
);
2249 if (gimple_call_internal_p (stmt
)
2250 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2251 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2252 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2255 if (gimple_call_lhs (stmt
) == NULL_TREE
2256 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2259 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2261 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2263 /* Process function arguments. */
2264 rhs_type
= NULL_TREE
;
2265 vectype_in
= NULL_TREE
;
2266 nargs
= gimple_call_num_args (stmt
);
2268 /* Bail out if the function has more than three arguments, we do not have
2269 interesting builtin functions to vectorize with more than two arguments
2270 except for fma. No arguments is also not good. */
2271 if (nargs
== 0 || nargs
> 3)
2274 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2275 if (gimple_call_internal_p (stmt
)
2276 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2279 rhs_type
= unsigned_type_node
;
2282 for (i
= 0; i
< nargs
; i
++)
2286 op
= gimple_call_arg (stmt
, i
);
2288 /* We can only handle calls with arguments of the same type. */
2290 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2294 "argument types differ.\n");
2298 rhs_type
= TREE_TYPE (op
);
2300 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2301 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2303 if (dump_enabled_p ())
2304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2305 "use not simple.\n");
2310 vectype_in
= opvectype
;
2312 && opvectype
!= vectype_in
)
2314 if (dump_enabled_p ())
2315 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2316 "argument vector types differ.\n");
2320 /* If all arguments are external or constant defs use a vector type with
2321 the same size as the output vector type. */
2323 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2325 gcc_assert (vectype_in
);
2328 if (dump_enabled_p ())
2330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2331 "no vectype for scalar type ");
2332 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2333 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2340 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2341 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2342 if (nunits_in
== nunits_out
/ 2)
2344 else if (nunits_out
== nunits_in
)
2346 else if (nunits_out
== nunits_in
/ 2)
2351 /* For now, we only vectorize functions if a target specific builtin
2352 is available. TODO -- in some cases, it might be profitable to
2353 insert the calls for pieces of the vector, in order to be able
2354 to vectorize other operations in the loop. */
2355 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2356 if (fndecl
== NULL_TREE
)
2358 if (gimple_call_internal_p (stmt
)
2359 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2362 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2363 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2364 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2365 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2367 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2368 { 0, 1, 2, ... vf - 1 } vector. */
2369 gcc_assert (nargs
== 0);
2373 if (dump_enabled_p ())
2374 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2375 "function is not vectorizable.\n");
2380 gcc_assert (!gimple_vuse (stmt
));
2382 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2384 else if (modifier
== NARROW
)
2385 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2387 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2389 /* Sanity check: make sure that at least one copy of the vectorized stmt
2390 needs to be generated. */
2391 gcc_assert (ncopies
>= 1);
2393 if (!vec_stmt
) /* transformation not required. */
2395 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2399 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2409 scalar_dest
= gimple_call_lhs (stmt
);
2410 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2412 prev_stmt_info
= NULL
;
2416 for (j
= 0; j
< ncopies
; ++j
)
2418 /* Build argument list for the vectorized call. */
2420 vargs
.create (nargs
);
2426 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2427 vec
<tree
> vec_oprnds0
;
2429 for (i
= 0; i
< nargs
; i
++)
2430 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2431 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2432 vec_oprnds0
= vec_defs
[0];
2434 /* Arguments are ready. Create the new vector stmt. */
2435 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2438 for (k
= 0; k
< nargs
; k
++)
2440 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2441 vargs
[k
] = vec_oprndsk
[i
];
2443 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2444 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2445 gimple_call_set_lhs (new_stmt
, new_temp
);
2446 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2447 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2450 for (i
= 0; i
< nargs
; i
++)
2452 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2453 vec_oprndsi
.release ();
2458 for (i
= 0; i
< nargs
; i
++)
2460 op
= gimple_call_arg (stmt
, i
);
2463 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2466 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2468 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2471 vargs
.quick_push (vec_oprnd0
);
2474 if (gimple_call_internal_p (stmt
)
2475 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2477 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2479 for (k
= 0; k
< nunits_out
; ++k
)
2480 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2481 tree cst
= build_vector (vectype_out
, v
);
2483 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2484 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2485 new_temp
= make_ssa_name (new_var
, init_stmt
);
2486 gimple_assign_set_lhs (init_stmt
, new_temp
);
2487 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2488 new_temp
= make_ssa_name (vec_dest
);
2489 new_stmt
= gimple_build_assign (new_temp
,
2490 gimple_assign_lhs (init_stmt
));
2494 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2495 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2496 gimple_call_set_lhs (new_stmt
, new_temp
);
2498 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2501 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2503 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2505 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2511 for (j
= 0; j
< ncopies
; ++j
)
2513 /* Build argument list for the vectorized call. */
2515 vargs
.create (nargs
* 2);
2521 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2522 vec
<tree
> vec_oprnds0
;
2524 for (i
= 0; i
< nargs
; i
++)
2525 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2526 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2527 vec_oprnds0
= vec_defs
[0];
2529 /* Arguments are ready. Create the new vector stmt. */
2530 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2534 for (k
= 0; k
< nargs
; k
++)
2536 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2537 vargs
.quick_push (vec_oprndsk
[i
]);
2538 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2540 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2541 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2542 gimple_call_set_lhs (new_stmt
, new_temp
);
2543 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2544 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2547 for (i
= 0; i
< nargs
; i
++)
2549 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2550 vec_oprndsi
.release ();
2555 for (i
= 0; i
< nargs
; i
++)
2557 op
= gimple_call_arg (stmt
, i
);
2561 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2563 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2567 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2569 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2571 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2574 vargs
.quick_push (vec_oprnd0
);
2575 vargs
.quick_push (vec_oprnd1
);
2578 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2579 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2580 gimple_call_set_lhs (new_stmt
, new_temp
);
2581 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2584 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2586 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2588 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2591 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2596 /* No current target implements this case. */
2602 /* The call in STMT might prevent it from being removed in dce.
2603 We however cannot remove it here, due to the way the ssa name
2604 it defines is mapped to the new definition. So just replace
2605 rhs of the statement with something harmless. */
2610 type
= TREE_TYPE (scalar_dest
);
2611 if (is_pattern_stmt_p (stmt_info
))
2612 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2614 lhs
= gimple_call_lhs (stmt
);
2615 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2616 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2617 set_vinfo_for_stmt (stmt
, NULL
);
2618 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2619 gsi_replace (gsi
, new_stmt
, false);
2625 struct simd_call_arg_info
2629 enum vect_def_type dt
;
2630 HOST_WIDE_INT linear_step
;
2634 /* Function vectorizable_simd_clone_call.
2636 Check if STMT performs a function call that can be vectorized
2637 by calling a simd clone of the function.
2638 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2639 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2640 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2643 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2644 gimple
*vec_stmt
, slp_tree slp_node
)
2649 tree vec_oprnd0
= NULL_TREE
;
2650 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2652 unsigned int nunits
;
2653 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2654 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2655 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2656 tree fndecl
, new_temp
, def
;
2658 gimple new_stmt
= NULL
;
2660 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2661 vec
<tree
> vargs
= vNULL
;
2663 tree lhs
, rtype
, ratype
;
2664 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2666 /* Is STMT a vectorizable call? */
2667 if (!is_gimple_call (stmt
))
2670 fndecl
= gimple_call_fndecl (stmt
);
2671 if (fndecl
== NULL_TREE
)
2674 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2675 if (node
== NULL
|| node
->simd_clones
== NULL
)
2678 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2681 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2684 if (gimple_call_lhs (stmt
)
2685 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2688 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2690 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2692 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2696 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2699 /* Process function arguments. */
2700 nargs
= gimple_call_num_args (stmt
);
2702 /* Bail out if the function has zero arguments. */
2706 arginfo
.create (nargs
);
2708 for (i
= 0; i
< nargs
; i
++)
2710 simd_call_arg_info thisarginfo
;
2713 thisarginfo
.linear_step
= 0;
2714 thisarginfo
.align
= 0;
2715 thisarginfo
.op
= NULL_TREE
;
2717 op
= gimple_call_arg (stmt
, i
);
2718 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2719 &def_stmt
, &def
, &thisarginfo
.dt
,
2720 &thisarginfo
.vectype
)
2721 || thisarginfo
.dt
== vect_uninitialized_def
)
2723 if (dump_enabled_p ())
2724 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2725 "use not simple.\n");
2730 if (thisarginfo
.dt
== vect_constant_def
2731 || thisarginfo
.dt
== vect_external_def
)
2732 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2734 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2736 /* For linear arguments, the analyze phase should have saved
2737 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2738 if (i
* 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2739 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2])
2741 gcc_assert (vec_stmt
);
2742 thisarginfo
.linear_step
2743 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2]);
2745 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 1];
2746 /* If loop has been peeled for alignment, we need to adjust it. */
2747 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2748 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2751 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2752 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2];
2753 tree opt
= TREE_TYPE (thisarginfo
.op
);
2754 bias
= fold_convert (TREE_TYPE (step
), bias
);
2755 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2757 = fold_build2 (POINTER_TYPE_P (opt
)
2758 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2759 thisarginfo
.op
, bias
);
2763 && thisarginfo
.dt
!= vect_constant_def
2764 && thisarginfo
.dt
!= vect_external_def
2766 && TREE_CODE (op
) == SSA_NAME
2767 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2769 && tree_fits_shwi_p (iv
.step
))
2771 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2772 thisarginfo
.op
= iv
.base
;
2774 else if ((thisarginfo
.dt
== vect_constant_def
2775 || thisarginfo
.dt
== vect_external_def
)
2776 && POINTER_TYPE_P (TREE_TYPE (op
)))
2777 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2779 arginfo
.quick_push (thisarginfo
);
2782 unsigned int badness
= 0;
2783 struct cgraph_node
*bestn
= NULL
;
2784 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2785 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2787 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2788 n
= n
->simdclone
->next_clone
)
2790 unsigned int this_badness
= 0;
2791 if (n
->simdclone
->simdlen
2792 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2793 || n
->simdclone
->nargs
!= nargs
)
2795 if (n
->simdclone
->simdlen
2796 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2797 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2798 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2799 if (n
->simdclone
->inbranch
)
2800 this_badness
+= 2048;
2801 int target_badness
= targetm
.simd_clone
.usable (n
);
2802 if (target_badness
< 0)
2804 this_badness
+= target_badness
* 512;
2805 /* FORNOW: Have to add code to add the mask argument. */
2806 if (n
->simdclone
->inbranch
)
2808 for (i
= 0; i
< nargs
; i
++)
2810 switch (n
->simdclone
->args
[i
].arg_type
)
2812 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2813 if (!useless_type_conversion_p
2814 (n
->simdclone
->args
[i
].orig_type
,
2815 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2817 else if (arginfo
[i
].dt
== vect_constant_def
2818 || arginfo
[i
].dt
== vect_external_def
2819 || arginfo
[i
].linear_step
)
2822 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2823 if (arginfo
[i
].dt
!= vect_constant_def
2824 && arginfo
[i
].dt
!= vect_external_def
)
2827 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2828 if (arginfo
[i
].dt
== vect_constant_def
2829 || arginfo
[i
].dt
== vect_external_def
2830 || (arginfo
[i
].linear_step
2831 != n
->simdclone
->args
[i
].linear_step
))
2834 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2838 case SIMD_CLONE_ARG_TYPE_MASK
:
2841 if (i
== (size_t) -1)
2843 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2848 if (arginfo
[i
].align
)
2849 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2850 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2852 if (i
== (size_t) -1)
2854 if (bestn
== NULL
|| this_badness
< badness
)
2857 badness
= this_badness
;
2867 for (i
= 0; i
< nargs
; i
++)
2868 if ((arginfo
[i
].dt
== vect_constant_def
2869 || arginfo
[i
].dt
== vect_external_def
)
2870 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2873 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2875 if (arginfo
[i
].vectype
== NULL
2876 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2877 > bestn
->simdclone
->simdlen
))
2884 fndecl
= bestn
->decl
;
2885 nunits
= bestn
->simdclone
->simdlen
;
2886 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2888 /* If the function isn't const, only allow it in simd loops where user
2889 has asserted that at least nunits consecutive iterations can be
2890 performed using SIMD instructions. */
2891 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2892 && gimple_vuse (stmt
))
2898 /* Sanity check: make sure that at least one copy of the vectorized stmt
2899 needs to be generated. */
2900 gcc_assert (ncopies
>= 1);
2902 if (!vec_stmt
) /* transformation not required. */
2904 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
2905 for (i
= 0; i
< nargs
; i
++)
2906 if (bestn
->simdclone
->args
[i
].arg_type
2907 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
2909 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 2
2911 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
2912 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
2913 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
2914 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
2915 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
2917 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2918 if (dump_enabled_p ())
2919 dump_printf_loc (MSG_NOTE
, vect_location
,
2920 "=== vectorizable_simd_clone_call ===\n");
2921 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2928 if (dump_enabled_p ())
2929 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2932 scalar_dest
= gimple_call_lhs (stmt
);
2933 vec_dest
= NULL_TREE
;
2938 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2939 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2940 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2943 rtype
= TREE_TYPE (ratype
);
2947 prev_stmt_info
= NULL
;
2948 for (j
= 0; j
< ncopies
; ++j
)
2950 /* Build argument list for the vectorized call. */
2952 vargs
.create (nargs
);
2956 for (i
= 0; i
< nargs
; i
++)
2958 unsigned int k
, l
, m
, o
;
2960 op
= gimple_call_arg (stmt
, i
);
2961 switch (bestn
->simdclone
->args
[i
].arg_type
)
2963 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2964 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2965 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2966 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2968 if (TYPE_VECTOR_SUBPARTS (atype
)
2969 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2971 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2972 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2973 / TYPE_VECTOR_SUBPARTS (atype
));
2974 gcc_assert ((k
& (k
- 1)) == 0);
2977 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2980 vec_oprnd0
= arginfo
[i
].op
;
2981 if ((m
& (k
- 1)) == 0)
2983 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2986 arginfo
[i
].op
= vec_oprnd0
;
2988 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2990 bitsize_int ((m
& (k
- 1)) * prec
));
2992 = gimple_build_assign (make_ssa_name (atype
),
2994 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2995 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2999 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3000 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3001 gcc_assert ((k
& (k
- 1)) == 0);
3002 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3004 vec_alloc (ctor_elts
, k
);
3007 for (l
= 0; l
< k
; l
++)
3009 if (m
== 0 && l
== 0)
3011 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3014 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3016 arginfo
[i
].op
= vec_oprnd0
;
3019 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3023 vargs
.safe_push (vec_oprnd0
);
3026 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3028 = gimple_build_assign (make_ssa_name (atype
),
3030 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3031 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3036 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3037 vargs
.safe_push (op
);
3039 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3044 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3049 edge pe
= loop_preheader_edge (loop
);
3050 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3051 gcc_assert (!new_bb
);
3053 tree phi_res
= copy_ssa_name (op
);
3054 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3055 set_vinfo_for_stmt (new_phi
,
3056 new_stmt_vec_info (new_phi
, loop_vinfo
,
3058 add_phi_arg (new_phi
, arginfo
[i
].op
,
3059 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3061 = POINTER_TYPE_P (TREE_TYPE (op
))
3062 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3063 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3064 ? sizetype
: TREE_TYPE (op
);
3066 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3068 tree tcst
= wide_int_to_tree (type
, cst
);
3069 tree phi_arg
= copy_ssa_name (op
);
3071 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3072 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3073 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3074 set_vinfo_for_stmt (new_stmt
,
3075 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3077 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3079 arginfo
[i
].op
= phi_res
;
3080 vargs
.safe_push (phi_res
);
3085 = POINTER_TYPE_P (TREE_TYPE (op
))
3086 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3087 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3088 ? sizetype
: TREE_TYPE (op
);
3090 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3092 tree tcst
= wide_int_to_tree (type
, cst
);
3093 new_temp
= make_ssa_name (TREE_TYPE (op
));
3094 new_stmt
= gimple_build_assign (new_temp
, code
,
3095 arginfo
[i
].op
, tcst
);
3096 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3097 vargs
.safe_push (new_temp
);
3100 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3106 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3109 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3111 new_temp
= create_tmp_var (ratype
);
3112 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3113 == TYPE_VECTOR_SUBPARTS (rtype
))
3114 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3116 new_temp
= make_ssa_name (rtype
, new_stmt
);
3117 gimple_call_set_lhs (new_stmt
, new_temp
);
3119 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3123 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3126 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3127 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3128 gcc_assert ((k
& (k
- 1)) == 0);
3129 for (l
= 0; l
< k
; l
++)
3134 t
= build_fold_addr_expr (new_temp
);
3135 t
= build2 (MEM_REF
, vectype
, t
,
3136 build_int_cst (TREE_TYPE (t
),
3137 l
* prec
/ BITS_PER_UNIT
));
3140 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3141 size_int (prec
), bitsize_int (l
* prec
));
3143 = gimple_build_assign (make_ssa_name (vectype
), t
);
3144 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3145 if (j
== 0 && l
== 0)
3146 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3148 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3150 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3155 tree clobber
= build_constructor (ratype
, NULL
);
3156 TREE_THIS_VOLATILE (clobber
) = 1;
3157 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3158 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3162 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3164 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3165 / TYPE_VECTOR_SUBPARTS (rtype
));
3166 gcc_assert ((k
& (k
- 1)) == 0);
3167 if ((j
& (k
- 1)) == 0)
3168 vec_alloc (ret_ctor_elts
, k
);
3171 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3172 for (m
= 0; m
< o
; m
++)
3174 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3175 size_int (m
), NULL_TREE
, NULL_TREE
);
3177 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3178 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3179 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3180 gimple_assign_lhs (new_stmt
));
3182 tree clobber
= build_constructor (ratype
, NULL
);
3183 TREE_THIS_VOLATILE (clobber
) = 1;
3184 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3185 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3188 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3189 if ((j
& (k
- 1)) != k
- 1)
3191 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3193 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3194 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3196 if ((unsigned) j
== k
- 1)
3197 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3199 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3201 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3206 tree t
= build_fold_addr_expr (new_temp
);
3207 t
= build2 (MEM_REF
, vectype
, t
,
3208 build_int_cst (TREE_TYPE (t
), 0));
3210 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3211 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3212 tree clobber
= build_constructor (ratype
, NULL
);
3213 TREE_THIS_VOLATILE (clobber
) = 1;
3214 vect_finish_stmt_generation (stmt
,
3215 gimple_build_assign (new_temp
,
3221 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3223 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3225 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3230 /* The call in STMT might prevent it from being removed in dce.
3231 We however cannot remove it here, due to the way the ssa name
3232 it defines is mapped to the new definition. So just replace
3233 rhs of the statement with something harmless. */
3240 type
= TREE_TYPE (scalar_dest
);
3241 if (is_pattern_stmt_p (stmt_info
))
3242 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3244 lhs
= gimple_call_lhs (stmt
);
3245 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3248 new_stmt
= gimple_build_nop ();
3249 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3250 set_vinfo_for_stmt (stmt
, NULL
);
3251 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3252 gsi_replace (gsi
, new_stmt
, true);
3253 unlink_stmt_vdef (stmt
);
3259 /* Function vect_gen_widened_results_half
3261 Create a vector stmt whose code, type, number of arguments, and result
3262 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3263 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3264 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3265 needs to be created (DECL is a function-decl of a target-builtin).
3266 STMT is the original scalar stmt that we are vectorizing. */
3269 vect_gen_widened_results_half (enum tree_code code
,
3271 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3272 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3278 /* Generate half of the widened result: */
3279 if (code
== CALL_EXPR
)
3281 /* Target specific support */
3282 if (op_type
== binary_op
)
3283 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3285 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3286 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3287 gimple_call_set_lhs (new_stmt
, new_temp
);
3291 /* Generic support */
3292 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3293 if (op_type
!= binary_op
)
3295 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3296 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3297 gimple_assign_set_lhs (new_stmt
, new_temp
);
3299 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3305 /* Get vectorized definitions for loop-based vectorization. For the first
3306 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3307 scalar operand), and for the rest we get a copy with
3308 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3309 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3310 The vectors are collected into VEC_OPRNDS. */
3313 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3314 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3318 /* Get first vector operand. */
3319 /* All the vector operands except the very first one (that is scalar oprnd)
3321 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3322 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3324 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3326 vec_oprnds
->quick_push (vec_oprnd
);
3328 /* Get second vector operand. */
3329 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3330 vec_oprnds
->quick_push (vec_oprnd
);
3334 /* For conversion in multiple steps, continue to get operands
3337 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3341 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3342 For multi-step conversions store the resulting vectors and call the function
3346 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3347 int multi_step_cvt
, gimple stmt
,
3349 gimple_stmt_iterator
*gsi
,
3350 slp_tree slp_node
, enum tree_code code
,
3351 stmt_vec_info
*prev_stmt_info
)
3354 tree vop0
, vop1
, new_tmp
, vec_dest
;
3356 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3358 vec_dest
= vec_dsts
.pop ();
3360 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3362 /* Create demotion operation. */
3363 vop0
= (*vec_oprnds
)[i
];
3364 vop1
= (*vec_oprnds
)[i
+ 1];
3365 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3366 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3367 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3368 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3371 /* Store the resulting vector for next recursive call. */
3372 (*vec_oprnds
)[i
/2] = new_tmp
;
3375 /* This is the last step of the conversion sequence. Store the
3376 vectors in SLP_NODE or in vector info of the scalar statement
3377 (or in STMT_VINFO_RELATED_STMT chain). */
3379 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3382 if (!*prev_stmt_info
)
3383 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3385 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3387 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3392 /* For multi-step demotion operations we first generate demotion operations
3393 from the source type to the intermediate types, and then combine the
3394 results (stored in VEC_OPRNDS) in demotion operation to the destination
3398 /* At each level of recursion we have half of the operands we had at the
3400 vec_oprnds
->truncate ((i
+1)/2);
3401 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3402 stmt
, vec_dsts
, gsi
, slp_node
,
3403 VEC_PACK_TRUNC_EXPR
,
3407 vec_dsts
.quick_push (vec_dest
);
3411 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3412 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3413 the resulting vectors and call the function recursively. */
3416 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3417 vec
<tree
> *vec_oprnds1
,
3418 gimple stmt
, tree vec_dest
,
3419 gimple_stmt_iterator
*gsi
,
3420 enum tree_code code1
,
3421 enum tree_code code2
, tree decl1
,
3422 tree decl2
, int op_type
)
3425 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3426 gimple new_stmt1
, new_stmt2
;
3427 vec
<tree
> vec_tmp
= vNULL
;
3429 vec_tmp
.create (vec_oprnds0
->length () * 2);
3430 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3432 if (op_type
== binary_op
)
3433 vop1
= (*vec_oprnds1
)[i
];
3437 /* Generate the two halves of promotion operation. */
3438 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3439 op_type
, vec_dest
, gsi
, stmt
);
3440 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3441 op_type
, vec_dest
, gsi
, stmt
);
3442 if (is_gimple_call (new_stmt1
))
3444 new_tmp1
= gimple_call_lhs (new_stmt1
);
3445 new_tmp2
= gimple_call_lhs (new_stmt2
);
3449 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3450 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3453 /* Store the results for the next step. */
3454 vec_tmp
.quick_push (new_tmp1
);
3455 vec_tmp
.quick_push (new_tmp2
);
3458 vec_oprnds0
->release ();
3459 *vec_oprnds0
= vec_tmp
;
3463 /* Check if STMT performs a conversion operation, that can be vectorized.
3464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3465 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3466 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3469 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3470 gimple
*vec_stmt
, slp_tree slp_node
)
3474 tree op0
, op1
= NULL_TREE
;
3475 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3476 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3477 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3478 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3479 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3480 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3484 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3485 gimple new_stmt
= NULL
;
3486 stmt_vec_info prev_stmt_info
;
3489 tree vectype_out
, vectype_in
;
3491 tree lhs_type
, rhs_type
;
3492 enum { NARROW
, NONE
, WIDEN
} modifier
;
3493 vec
<tree
> vec_oprnds0
= vNULL
;
3494 vec
<tree
> vec_oprnds1
= vNULL
;
3496 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3497 int multi_step_cvt
= 0;
3498 vec
<tree
> vec_dsts
= vNULL
;
3499 vec
<tree
> interm_types
= vNULL
;
3500 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3502 machine_mode rhs_mode
;
3503 unsigned short fltsz
;
3505 /* Is STMT a vectorizable conversion? */
3507 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3510 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3513 if (!is_gimple_assign (stmt
))
3516 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3519 code
= gimple_assign_rhs_code (stmt
);
3520 if (!CONVERT_EXPR_CODE_P (code
)
3521 && code
!= FIX_TRUNC_EXPR
3522 && code
!= FLOAT_EXPR
3523 && code
!= WIDEN_MULT_EXPR
3524 && code
!= WIDEN_LSHIFT_EXPR
)
3527 op_type
= TREE_CODE_LENGTH (code
);
3529 /* Check types of lhs and rhs. */
3530 scalar_dest
= gimple_assign_lhs (stmt
);
3531 lhs_type
= TREE_TYPE (scalar_dest
);
3532 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3534 op0
= gimple_assign_rhs1 (stmt
);
3535 rhs_type
= TREE_TYPE (op0
);
3537 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3538 && !((INTEGRAL_TYPE_P (lhs_type
)
3539 && INTEGRAL_TYPE_P (rhs_type
))
3540 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3541 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3544 if ((INTEGRAL_TYPE_P (lhs_type
)
3545 && (TYPE_PRECISION (lhs_type
)
3546 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3547 || (INTEGRAL_TYPE_P (rhs_type
)
3548 && (TYPE_PRECISION (rhs_type
)
3549 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3551 if (dump_enabled_p ())
3552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3553 "type conversion to/from bit-precision unsupported."
3558 /* Check the operands of the operation. */
3559 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3560 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3562 if (dump_enabled_p ())
3563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3564 "use not simple.\n");
3567 if (op_type
== binary_op
)
3571 op1
= gimple_assign_rhs2 (stmt
);
3572 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3573 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3575 if (CONSTANT_CLASS_P (op0
))
3576 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3577 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3579 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3584 if (dump_enabled_p ())
3585 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3586 "use not simple.\n");
3591 /* If op0 is an external or constant defs use a vector type of
3592 the same size as the output vector type. */
3594 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3596 gcc_assert (vectype_in
);
3599 if (dump_enabled_p ())
3601 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3602 "no vectype for scalar type ");
3603 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3604 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3610 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3611 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3612 if (nunits_in
< nunits_out
)
3614 else if (nunits_out
== nunits_in
)
3619 /* Multiple types in SLP are handled by creating the appropriate number of
3620 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3622 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3624 else if (modifier
== NARROW
)
3625 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3627 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3629 /* Sanity check: make sure that at least one copy of the vectorized stmt
3630 needs to be generated. */
3631 gcc_assert (ncopies
>= 1);
3633 /* Supportable by target? */
3637 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3639 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3644 if (dump_enabled_p ())
3645 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3646 "conversion not supported by target.\n");
3650 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3651 &code1
, &code2
, &multi_step_cvt
,
3654 /* Binary widening operation can only be supported directly by the
3656 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3660 if (code
!= FLOAT_EXPR
3661 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3662 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3665 rhs_mode
= TYPE_MODE (rhs_type
);
3666 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3667 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3668 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3669 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3672 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3673 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3674 if (cvt_type
== NULL_TREE
)
3677 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3679 if (!supportable_convert_operation (code
, vectype_out
,
3680 cvt_type
, &decl1
, &codecvt1
))
3683 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3684 cvt_type
, &codecvt1
,
3685 &codecvt2
, &multi_step_cvt
,
3689 gcc_assert (multi_step_cvt
== 0);
3691 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3692 vectype_in
, &code1
, &code2
,
3693 &multi_step_cvt
, &interm_types
))
3697 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3700 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3701 codecvt2
= ERROR_MARK
;
3705 interm_types
.safe_push (cvt_type
);
3706 cvt_type
= NULL_TREE
;
3711 gcc_assert (op_type
== unary_op
);
3712 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3713 &code1
, &multi_step_cvt
,
3717 if (code
!= FIX_TRUNC_EXPR
3718 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3719 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3722 rhs_mode
= TYPE_MODE (rhs_type
);
3724 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3725 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3726 if (cvt_type
== NULL_TREE
)
3728 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3731 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3732 &code1
, &multi_step_cvt
,
3741 if (!vec_stmt
) /* transformation not required. */
3743 if (dump_enabled_p ())
3744 dump_printf_loc (MSG_NOTE
, vect_location
,
3745 "=== vectorizable_conversion ===\n");
3746 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3748 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3749 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3751 else if (modifier
== NARROW
)
3753 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3754 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3758 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3759 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3761 interm_types
.release ();
3766 if (dump_enabled_p ())
3767 dump_printf_loc (MSG_NOTE
, vect_location
,
3768 "transform conversion. ncopies = %d.\n", ncopies
);
3770 if (op_type
== binary_op
)
3772 if (CONSTANT_CLASS_P (op0
))
3773 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3774 else if (CONSTANT_CLASS_P (op1
))
3775 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3778 /* In case of multi-step conversion, we first generate conversion operations
3779 to the intermediate types, and then from that types to the final one.
3780 We create vector destinations for the intermediate type (TYPES) received
3781 from supportable_*_operation, and store them in the correct order
3782 for future use in vect_create_vectorized_*_stmts (). */
3783 vec_dsts
.create (multi_step_cvt
+ 1);
3784 vec_dest
= vect_create_destination_var (scalar_dest
,
3785 (cvt_type
&& modifier
== WIDEN
)
3786 ? cvt_type
: vectype_out
);
3787 vec_dsts
.quick_push (vec_dest
);
3791 for (i
= interm_types
.length () - 1;
3792 interm_types
.iterate (i
, &intermediate_type
); i
--)
3794 vec_dest
= vect_create_destination_var (scalar_dest
,
3796 vec_dsts
.quick_push (vec_dest
);
3801 vec_dest
= vect_create_destination_var (scalar_dest
,
3803 ? vectype_out
: cvt_type
);
3807 if (modifier
== WIDEN
)
3809 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3810 if (op_type
== binary_op
)
3811 vec_oprnds1
.create (1);
3813 else if (modifier
== NARROW
)
3814 vec_oprnds0
.create (
3815 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3817 else if (code
== WIDEN_LSHIFT_EXPR
)
3818 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3821 prev_stmt_info
= NULL
;
3825 for (j
= 0; j
< ncopies
; j
++)
3828 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3831 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3833 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3835 /* Arguments are ready, create the new vector stmt. */
3836 if (code1
== CALL_EXPR
)
3838 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3839 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3840 gimple_call_set_lhs (new_stmt
, new_temp
);
3844 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3845 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3846 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3847 gimple_assign_set_lhs (new_stmt
, new_temp
);
3850 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3852 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3856 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3858 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3859 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3864 /* In case the vectorization factor (VF) is bigger than the number
3865 of elements that we can fit in a vectype (nunits), we have to
3866 generate more than one vector stmt - i.e - we need to "unroll"
3867 the vector stmt by a factor VF/nunits. */
3868 for (j
= 0; j
< ncopies
; j
++)
3875 if (code
== WIDEN_LSHIFT_EXPR
)
3880 /* Store vec_oprnd1 for every vector stmt to be created
3881 for SLP_NODE. We check during the analysis that all
3882 the shift arguments are the same. */
3883 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3884 vec_oprnds1
.quick_push (vec_oprnd1
);
3886 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3890 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3891 &vec_oprnds1
, slp_node
, -1);
3895 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3896 vec_oprnds0
.quick_push (vec_oprnd0
);
3897 if (op_type
== binary_op
)
3899 if (code
== WIDEN_LSHIFT_EXPR
)
3902 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3904 vec_oprnds1
.quick_push (vec_oprnd1
);
3910 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3911 vec_oprnds0
.truncate (0);
3912 vec_oprnds0
.quick_push (vec_oprnd0
);
3913 if (op_type
== binary_op
)
3915 if (code
== WIDEN_LSHIFT_EXPR
)
3918 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3920 vec_oprnds1
.truncate (0);
3921 vec_oprnds1
.quick_push (vec_oprnd1
);
3925 /* Arguments are ready. Create the new vector stmts. */
3926 for (i
= multi_step_cvt
; i
>= 0; i
--)
3928 tree this_dest
= vec_dsts
[i
];
3929 enum tree_code c1
= code1
, c2
= code2
;
3930 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3935 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3937 stmt
, this_dest
, gsi
,
3938 c1
, c2
, decl1
, decl2
,
3942 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3946 if (codecvt1
== CALL_EXPR
)
3948 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3949 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3950 gimple_call_set_lhs (new_stmt
, new_temp
);
3954 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3955 new_temp
= make_ssa_name (vec_dest
);
3956 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
3960 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3963 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3966 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3968 if (!prev_stmt_info
)
3969 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3971 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3972 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3976 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3980 /* In case the vectorization factor (VF) is bigger than the number
3981 of elements that we can fit in a vectype (nunits), we have to
3982 generate more than one vector stmt - i.e - we need to "unroll"
3983 the vector stmt by a factor VF/nunits. */
3984 for (j
= 0; j
< ncopies
; j
++)
3988 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3992 vec_oprnds0
.truncate (0);
3993 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3994 vect_pow2 (multi_step_cvt
) - 1);
3997 /* Arguments are ready. Create the new vector stmts. */
3999 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4001 if (codecvt1
== CALL_EXPR
)
4003 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4004 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4005 gimple_call_set_lhs (new_stmt
, new_temp
);
4009 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4010 new_temp
= make_ssa_name (vec_dest
);
4011 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4015 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4016 vec_oprnds0
[i
] = new_temp
;
4019 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4020 stmt
, vec_dsts
, gsi
,
4025 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4029 vec_oprnds0
.release ();
4030 vec_oprnds1
.release ();
4031 vec_dsts
.release ();
4032 interm_types
.release ();
4038 /* Function vectorizable_assignment.
4040 Check if STMT performs an assignment (copy) that can be vectorized.
4041 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4042 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4043 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4046 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4047 gimple
*vec_stmt
, slp_tree slp_node
)
4052 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4053 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4054 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4058 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4059 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4062 vec
<tree
> vec_oprnds
= vNULL
;
4064 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4065 gimple new_stmt
= NULL
;
4066 stmt_vec_info prev_stmt_info
= NULL
;
4067 enum tree_code code
;
4070 /* Multiple types in SLP are handled by creating the appropriate number of
4071 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4073 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4076 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4078 gcc_assert (ncopies
>= 1);
4080 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4083 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4086 /* Is vectorizable assignment? */
4087 if (!is_gimple_assign (stmt
))
4090 scalar_dest
= gimple_assign_lhs (stmt
);
4091 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4094 code
= gimple_assign_rhs_code (stmt
);
4095 if (gimple_assign_single_p (stmt
)
4096 || code
== PAREN_EXPR
4097 || CONVERT_EXPR_CODE_P (code
))
4098 op
= gimple_assign_rhs1 (stmt
);
4102 if (code
== VIEW_CONVERT_EXPR
)
4103 op
= TREE_OPERAND (op
, 0);
4105 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4106 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4108 if (dump_enabled_p ())
4109 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4110 "use not simple.\n");
4114 /* We can handle NOP_EXPR conversions that do not change the number
4115 of elements or the vector size. */
4116 if ((CONVERT_EXPR_CODE_P (code
)
4117 || code
== VIEW_CONVERT_EXPR
)
4119 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4120 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4121 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4124 /* We do not handle bit-precision changes. */
4125 if ((CONVERT_EXPR_CODE_P (code
)
4126 || code
== VIEW_CONVERT_EXPR
)
4127 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4128 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4129 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4130 || ((TYPE_PRECISION (TREE_TYPE (op
))
4131 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4132 /* But a conversion that does not change the bit-pattern is ok. */
4133 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4134 > TYPE_PRECISION (TREE_TYPE (op
)))
4135 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4137 if (dump_enabled_p ())
4138 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4139 "type conversion to/from bit-precision "
4144 if (!vec_stmt
) /* transformation not required. */
4146 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4147 if (dump_enabled_p ())
4148 dump_printf_loc (MSG_NOTE
, vect_location
,
4149 "=== vectorizable_assignment ===\n");
4150 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4155 if (dump_enabled_p ())
4156 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4159 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4162 for (j
= 0; j
< ncopies
; j
++)
4166 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4168 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4170 /* Arguments are ready. create the new vector stmt. */
4171 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4173 if (CONVERT_EXPR_CODE_P (code
)
4174 || code
== VIEW_CONVERT_EXPR
)
4175 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4176 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4177 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4178 gimple_assign_set_lhs (new_stmt
, new_temp
);
4179 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4181 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4188 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4190 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4192 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4195 vec_oprnds
.release ();
4200 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4201 either as shift by a scalar or by a vector. */
4204 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4207 machine_mode vec_mode
;
4212 vectype
= get_vectype_for_scalar_type (scalar_type
);
4216 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4218 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4220 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4222 || (optab_handler (optab
, TYPE_MODE (vectype
))
4223 == CODE_FOR_nothing
))
4227 vec_mode
= TYPE_MODE (vectype
);
4228 icode
= (int) optab_handler (optab
, vec_mode
);
4229 if (icode
== CODE_FOR_nothing
)
4236 /* Function vectorizable_shift.
4238 Check if STMT performs a shift operation that can be vectorized.
4239 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4240 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4241 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4244 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4245 gimple
*vec_stmt
, slp_tree slp_node
)
4249 tree op0
, op1
= NULL
;
4250 tree vec_oprnd1
= NULL_TREE
;
4251 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4253 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4254 enum tree_code code
;
4255 machine_mode vec_mode
;
4259 machine_mode optab_op2_mode
;
4262 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4263 gimple new_stmt
= NULL
;
4264 stmt_vec_info prev_stmt_info
;
4271 vec
<tree
> vec_oprnds0
= vNULL
;
4272 vec
<tree
> vec_oprnds1
= vNULL
;
4275 bool scalar_shift_arg
= true;
4276 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4279 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4282 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4285 /* Is STMT a vectorizable binary/unary operation? */
4286 if (!is_gimple_assign (stmt
))
4289 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4292 code
= gimple_assign_rhs_code (stmt
);
4294 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4295 || code
== RROTATE_EXPR
))
4298 scalar_dest
= gimple_assign_lhs (stmt
);
4299 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4300 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4301 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4303 if (dump_enabled_p ())
4304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4305 "bit-precision shifts not supported.\n");
4309 op0
= gimple_assign_rhs1 (stmt
);
4310 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4311 &def_stmt
, &def
, &dt
[0], &vectype
))
4313 if (dump_enabled_p ())
4314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4315 "use not simple.\n");
4318 /* If op0 is an external or constant def use a vector type with
4319 the same size as the output vector type. */
4321 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4323 gcc_assert (vectype
);
4326 if (dump_enabled_p ())
4327 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4328 "no vectype for scalar type\n");
4332 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4333 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4334 if (nunits_out
!= nunits_in
)
4337 op1
= gimple_assign_rhs2 (stmt
);
4338 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4339 &def
, &dt
[1], &op1_vectype
))
4341 if (dump_enabled_p ())
4342 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4343 "use not simple.\n");
4348 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4352 /* Multiple types in SLP are handled by creating the appropriate number of
4353 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4355 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4358 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4360 gcc_assert (ncopies
>= 1);
4362 /* Determine whether the shift amount is a vector, or scalar. If the
4363 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4365 if (dt
[1] == vect_internal_def
&& !slp_node
)
4366 scalar_shift_arg
= false;
4367 else if (dt
[1] == vect_constant_def
4368 || dt
[1] == vect_external_def
4369 || dt
[1] == vect_internal_def
)
4371 /* In SLP, need to check whether the shift count is the same,
4372 in loops if it is a constant or invariant, it is always
4376 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4379 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4380 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4381 scalar_shift_arg
= false;
4386 if (dump_enabled_p ())
4387 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4388 "operand mode requires invariant argument.\n");
4392 /* Vector shifted by vector. */
4393 if (!scalar_shift_arg
)
4395 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4396 if (dump_enabled_p ())
4397 dump_printf_loc (MSG_NOTE
, vect_location
,
4398 "vector/vector shift/rotate found.\n");
4401 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4402 if (op1_vectype
== NULL_TREE
4403 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4405 if (dump_enabled_p ())
4406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4407 "unusable type for last operand in"
4408 " vector/vector shift/rotate.\n");
4412 /* See if the machine has a vector shifted by scalar insn and if not
4413 then see if it has a vector shifted by vector insn. */
4416 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4418 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4420 if (dump_enabled_p ())
4421 dump_printf_loc (MSG_NOTE
, vect_location
,
4422 "vector/scalar shift/rotate found.\n");
4426 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4428 && (optab_handler (optab
, TYPE_MODE (vectype
))
4429 != CODE_FOR_nothing
))
4431 scalar_shift_arg
= false;
4433 if (dump_enabled_p ())
4434 dump_printf_loc (MSG_NOTE
, vect_location
,
4435 "vector/vector shift/rotate found.\n");
4437 /* Unlike the other binary operators, shifts/rotates have
4438 the rhs being int, instead of the same type as the lhs,
4439 so make sure the scalar is the right type if we are
4440 dealing with vectors of long long/long/short/char. */
4441 if (dt
[1] == vect_constant_def
)
4442 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4443 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4447 && TYPE_MODE (TREE_TYPE (vectype
))
4448 != TYPE_MODE (TREE_TYPE (op1
)))
4450 if (dump_enabled_p ())
4451 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4452 "unusable type for last operand in"
4453 " vector/vector shift/rotate.\n");
4456 if (vec_stmt
&& !slp_node
)
4458 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4459 op1
= vect_init_vector (stmt
, op1
,
4460 TREE_TYPE (vectype
), NULL
);
4467 /* Supportable by target? */
4470 if (dump_enabled_p ())
4471 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4475 vec_mode
= TYPE_MODE (vectype
);
4476 icode
= (int) optab_handler (optab
, vec_mode
);
4477 if (icode
== CODE_FOR_nothing
)
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4481 "op not supported by target.\n");
4482 /* Check only during analysis. */
4483 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4484 || (vf
< vect_min_worthwhile_factor (code
)
4487 if (dump_enabled_p ())
4488 dump_printf_loc (MSG_NOTE
, vect_location
,
4489 "proceeding using word mode.\n");
4492 /* Worthwhile without SIMD support? Check only during analysis. */
4493 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4494 && vf
< vect_min_worthwhile_factor (code
)
4497 if (dump_enabled_p ())
4498 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4499 "not worthwhile without SIMD support.\n");
4503 if (!vec_stmt
) /* transformation not required. */
4505 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4506 if (dump_enabled_p ())
4507 dump_printf_loc (MSG_NOTE
, vect_location
,
4508 "=== vectorizable_shift ===\n");
4509 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4515 if (dump_enabled_p ())
4516 dump_printf_loc (MSG_NOTE
, vect_location
,
4517 "transform binary/unary operation.\n");
4520 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4522 prev_stmt_info
= NULL
;
4523 for (j
= 0; j
< ncopies
; j
++)
4528 if (scalar_shift_arg
)
4530 /* Vector shl and shr insn patterns can be defined with scalar
4531 operand 2 (shift operand). In this case, use constant or loop
4532 invariant op1 directly, without extending it to vector mode
4534 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4535 if (!VECTOR_MODE_P (optab_op2_mode
))
4537 if (dump_enabled_p ())
4538 dump_printf_loc (MSG_NOTE
, vect_location
,
4539 "operand 1 using scalar mode.\n");
4541 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4542 vec_oprnds1
.quick_push (vec_oprnd1
);
4545 /* Store vec_oprnd1 for every vector stmt to be created
4546 for SLP_NODE. We check during the analysis that all
4547 the shift arguments are the same.
4548 TODO: Allow different constants for different vector
4549 stmts generated for an SLP instance. */
4550 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4551 vec_oprnds1
.quick_push (vec_oprnd1
);
4556 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4557 (a special case for certain kind of vector shifts); otherwise,
4558 operand 1 should be of a vector type (the usual case). */
4560 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4563 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4567 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4569 /* Arguments are ready. Create the new vector stmt. */
4570 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4572 vop1
= vec_oprnds1
[i
];
4573 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4574 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4575 gimple_assign_set_lhs (new_stmt
, new_temp
);
4576 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4578 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4585 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4587 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4588 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4591 vec_oprnds0
.release ();
4592 vec_oprnds1
.release ();
4598 /* Function vectorizable_operation.
4600 Check if STMT performs a binary, unary or ternary operation that can
4602 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4603 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4604 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4607 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4608 gimple
*vec_stmt
, slp_tree slp_node
)
4612 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4613 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4615 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4616 enum tree_code code
;
4617 machine_mode vec_mode
;
4624 enum vect_def_type dt
[3]
4625 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4626 gimple new_stmt
= NULL
;
4627 stmt_vec_info prev_stmt_info
;
4633 vec
<tree
> vec_oprnds0
= vNULL
;
4634 vec
<tree
> vec_oprnds1
= vNULL
;
4635 vec
<tree
> vec_oprnds2
= vNULL
;
4636 tree vop0
, vop1
, vop2
;
4637 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4640 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4643 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4646 /* Is STMT a vectorizable binary/unary operation? */
4647 if (!is_gimple_assign (stmt
))
4650 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4653 code
= gimple_assign_rhs_code (stmt
);
4655 /* For pointer addition, we should use the normal plus for
4656 the vector addition. */
4657 if (code
== POINTER_PLUS_EXPR
)
4660 /* Support only unary or binary operations. */
4661 op_type
= TREE_CODE_LENGTH (code
);
4662 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4664 if (dump_enabled_p ())
4665 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4666 "num. args = %d (not unary/binary/ternary op).\n",
4671 scalar_dest
= gimple_assign_lhs (stmt
);
4672 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4674 /* Most operations cannot handle bit-precision types without extra
4676 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4677 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4678 /* Exception are bitwise binary operations. */
4679 && code
!= BIT_IOR_EXPR
4680 && code
!= BIT_XOR_EXPR
4681 && code
!= BIT_AND_EXPR
)
4683 if (dump_enabled_p ())
4684 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4685 "bit-precision arithmetic not supported.\n");
4689 op0
= gimple_assign_rhs1 (stmt
);
4690 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4691 &def_stmt
, &def
, &dt
[0], &vectype
))
4693 if (dump_enabled_p ())
4694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4695 "use not simple.\n");
4698 /* If op0 is an external or constant def use a vector type with
4699 the same size as the output vector type. */
4701 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4703 gcc_assert (vectype
);
4706 if (dump_enabled_p ())
4708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4709 "no vectype for scalar type ");
4710 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4712 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4718 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4719 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4720 if (nunits_out
!= nunits_in
)
4723 if (op_type
== binary_op
|| op_type
== ternary_op
)
4725 op1
= gimple_assign_rhs2 (stmt
);
4726 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4731 "use not simple.\n");
4735 if (op_type
== ternary_op
)
4737 op2
= gimple_assign_rhs3 (stmt
);
4738 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4741 if (dump_enabled_p ())
4742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4743 "use not simple.\n");
4749 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4753 /* Multiple types in SLP are handled by creating the appropriate number of
4754 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4756 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4759 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4761 gcc_assert (ncopies
>= 1);
4763 /* Shifts are handled in vectorizable_shift (). */
4764 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4765 || code
== RROTATE_EXPR
)
4768 /* Supportable by target? */
4770 vec_mode
= TYPE_MODE (vectype
);
4771 if (code
== MULT_HIGHPART_EXPR
)
4773 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4774 icode
= LAST_INSN_CODE
;
4776 icode
= CODE_FOR_nothing
;
4780 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4783 if (dump_enabled_p ())
4784 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4788 icode
= (int) optab_handler (optab
, vec_mode
);
4791 if (icode
== CODE_FOR_nothing
)
4793 if (dump_enabled_p ())
4794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4795 "op not supported by target.\n");
4796 /* Check only during analysis. */
4797 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4798 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4800 if (dump_enabled_p ())
4801 dump_printf_loc (MSG_NOTE
, vect_location
,
4802 "proceeding using word mode.\n");
4805 /* Worthwhile without SIMD support? Check only during analysis. */
4806 if (!VECTOR_MODE_P (vec_mode
)
4808 && vf
< vect_min_worthwhile_factor (code
))
4810 if (dump_enabled_p ())
4811 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4812 "not worthwhile without SIMD support.\n");
4816 if (!vec_stmt
) /* transformation not required. */
4818 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4819 if (dump_enabled_p ())
4820 dump_printf_loc (MSG_NOTE
, vect_location
,
4821 "=== vectorizable_operation ===\n");
4822 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4828 if (dump_enabled_p ())
4829 dump_printf_loc (MSG_NOTE
, vect_location
,
4830 "transform binary/unary operation.\n");
4833 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4835 /* In case the vectorization factor (VF) is bigger than the number
4836 of elements that we can fit in a vectype (nunits), we have to generate
4837 more than one vector stmt - i.e - we need to "unroll" the
4838 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4839 from one copy of the vector stmt to the next, in the field
4840 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4841 stages to find the correct vector defs to be used when vectorizing
4842 stmts that use the defs of the current stmt. The example below
4843 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4844 we need to create 4 vectorized stmts):
4846 before vectorization:
4847 RELATED_STMT VEC_STMT
4851 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4853 RELATED_STMT VEC_STMT
4854 VS1_0: vx0 = memref0 VS1_1 -
4855 VS1_1: vx1 = memref1 VS1_2 -
4856 VS1_2: vx2 = memref2 VS1_3 -
4857 VS1_3: vx3 = memref3 - -
4858 S1: x = load - VS1_0
4861 step2: vectorize stmt S2 (done here):
4862 To vectorize stmt S2 we first need to find the relevant vector
4863 def for the first operand 'x'. This is, as usual, obtained from
4864 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4865 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4866 relevant vector def 'vx0'. Having found 'vx0' we can generate
4867 the vector stmt VS2_0, and as usual, record it in the
4868 STMT_VINFO_VEC_STMT of stmt S2.
4869 When creating the second copy (VS2_1), we obtain the relevant vector
4870 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4871 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4872 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4873 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4874 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4875 chain of stmts and pointers:
4876 RELATED_STMT VEC_STMT
4877 VS1_0: vx0 = memref0 VS1_1 -
4878 VS1_1: vx1 = memref1 VS1_2 -
4879 VS1_2: vx2 = memref2 VS1_3 -
4880 VS1_3: vx3 = memref3 - -
4881 S1: x = load - VS1_0
4882 VS2_0: vz0 = vx0 + v1 VS2_1 -
4883 VS2_1: vz1 = vx1 + v1 VS2_2 -
4884 VS2_2: vz2 = vx2 + v1 VS2_3 -
4885 VS2_3: vz3 = vx3 + v1 - -
4886 S2: z = x + 1 - VS2_0 */
4888 prev_stmt_info
= NULL
;
4889 for (j
= 0; j
< ncopies
; j
++)
4894 if (op_type
== binary_op
|| op_type
== ternary_op
)
4895 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4898 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4900 if (op_type
== ternary_op
)
4902 vec_oprnds2
.create (1);
4903 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4910 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4911 if (op_type
== ternary_op
)
4913 tree vec_oprnd
= vec_oprnds2
.pop ();
4914 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4919 /* Arguments are ready. Create the new vector stmt. */
4920 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4922 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4923 ? vec_oprnds1
[i
] : NULL_TREE
);
4924 vop2
= ((op_type
== ternary_op
)
4925 ? vec_oprnds2
[i
] : NULL_TREE
);
4926 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
4927 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4928 gimple_assign_set_lhs (new_stmt
, new_temp
);
4929 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4931 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4938 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4940 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4941 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4944 vec_oprnds0
.release ();
4945 vec_oprnds1
.release ();
4946 vec_oprnds2
.release ();
4951 /* A helper function to ensure data reference DR's base alignment
4955 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4960 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4963 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4965 if (decl_in_symtab_p (base_decl
))
4966 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
4969 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4970 DECL_USER_ALIGN (base_decl
) = 1;
4972 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4977 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4978 reversal of the vector elements. If that is impossible to do,
4982 perm_mask_for_reverse (tree vectype
)
4987 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4988 sel
= XALLOCAVEC (unsigned char, nunits
);
4990 for (i
= 0; i
< nunits
; ++i
)
4991 sel
[i
] = nunits
- 1 - i
;
4993 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4995 return vect_gen_perm_mask_checked (vectype
, sel
);
4998 /* Function vectorizable_store.
5000 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5002 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5003 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5004 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5007 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5013 tree vec_oprnd
= NULL_TREE
;
5014 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5015 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5016 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5018 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5019 struct loop
*loop
= NULL
;
5020 machine_mode vec_mode
;
5022 enum dr_alignment_support alignment_support_scheme
;
5025 enum vect_def_type dt
;
5026 stmt_vec_info prev_stmt_info
= NULL
;
5027 tree dataref_ptr
= NULL_TREE
;
5028 tree dataref_offset
= NULL_TREE
;
5029 gimple ptr_incr
= NULL
;
5030 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5033 gimple next_stmt
, first_stmt
= NULL
;
5034 bool grouped_store
= false;
5035 bool store_lanes_p
= false;
5036 unsigned int group_size
, i
;
5037 vec
<tree
> dr_chain
= vNULL
;
5038 vec
<tree
> oprnds
= vNULL
;
5039 vec
<tree
> result_chain
= vNULL
;
5041 bool negative
= false;
5042 tree offset
= NULL_TREE
;
5043 vec
<tree
> vec_oprnds
= vNULL
;
5044 bool slp
= (slp_node
!= NULL
);
5045 unsigned int vec_num
;
5046 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5050 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5052 /* Multiple types in SLP are handled by creating the appropriate number of
5053 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5055 if (slp
|| PURE_SLP_STMT (stmt_info
))
5058 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5060 gcc_assert (ncopies
>= 1);
5062 /* FORNOW. This restriction should be relaxed. */
5063 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5065 if (dump_enabled_p ())
5066 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5067 "multiple types in nested loop.\n");
5071 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5074 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5077 /* Is vectorizable store? */
5079 if (!is_gimple_assign (stmt
))
5082 scalar_dest
= gimple_assign_lhs (stmt
);
5083 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5084 && is_pattern_stmt_p (stmt_info
))
5085 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5086 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5087 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5088 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5089 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5090 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5091 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5092 && TREE_CODE (scalar_dest
) != MEM_REF
)
5095 gcc_assert (gimple_assign_single_p (stmt
));
5096 op
= gimple_assign_rhs1 (stmt
);
5097 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5100 if (dump_enabled_p ())
5101 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5102 "use not simple.\n");
5106 elem_type
= TREE_TYPE (vectype
);
5107 vec_mode
= TYPE_MODE (vectype
);
5109 /* FORNOW. In some cases can vectorize even if data-type not supported
5110 (e.g. - array initialization with 0). */
5111 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5114 if (!STMT_VINFO_DATA_REF (stmt_info
))
5117 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5120 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5121 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5122 size_zero_node
) < 0;
5123 if (negative
&& ncopies
> 1)
5125 if (dump_enabled_p ())
5126 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5127 "multiple types with negative step.\n");
5132 gcc_assert (!grouped_store
);
5133 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5134 if (alignment_support_scheme
!= dr_aligned
5135 && alignment_support_scheme
!= dr_unaligned_supported
)
5137 if (dump_enabled_p ())
5138 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5139 "negative step but alignment required.\n");
5142 if (dt
!= vect_constant_def
5143 && dt
!= vect_external_def
5144 && !perm_mask_for_reverse (vectype
))
5146 if (dump_enabled_p ())
5147 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5148 "negative step and reversing not supported.\n");
5154 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5156 grouped_store
= true;
5157 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5158 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5160 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5161 if (vect_store_lanes_supported (vectype
, group_size
))
5162 store_lanes_p
= true;
5163 else if (!vect_grouped_store_supported (vectype
, group_size
))
5167 if (first_stmt
== stmt
)
5169 /* STMT is the leader of the group. Check the operands of all the
5170 stmts of the group. */
5171 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5174 gcc_assert (gimple_assign_single_p (next_stmt
));
5175 op
= gimple_assign_rhs1 (next_stmt
);
5176 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5177 &def_stmt
, &def
, &dt
))
5179 if (dump_enabled_p ())
5180 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5181 "use not simple.\n");
5184 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5189 if (!vec_stmt
) /* transformation not required. */
5191 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5192 /* The SLP costs are calculated during SLP analysis. */
5193 if (!PURE_SLP_STMT (stmt_info
))
5194 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5201 ensure_base_align (stmt_info
, dr
);
5205 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5206 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5208 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5211 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5213 /* We vectorize all the stmts of the interleaving group when we
5214 reach the last stmt in the group. */
5215 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5216 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5225 grouped_store
= false;
5226 /* VEC_NUM is the number of vect stmts to be created for this
5228 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5229 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5230 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5231 op
= gimple_assign_rhs1 (first_stmt
);
5234 /* VEC_NUM is the number of vect stmts to be created for this
5236 vec_num
= group_size
;
5242 group_size
= vec_num
= 1;
5245 if (dump_enabled_p ())
5246 dump_printf_loc (MSG_NOTE
, vect_location
,
5247 "transform store. ncopies = %d\n", ncopies
);
5249 if (STMT_VINFO_STRIDED_P (stmt_info
))
5251 gimple_stmt_iterator incr_gsi
;
5257 gimple_seq stmts
= NULL
;
5258 tree stride_base
, stride_step
, alias_off
;
5261 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5264 = fold_build_pointer_plus
5265 (unshare_expr (DR_BASE_ADDRESS (dr
)),
5266 size_binop (PLUS_EXPR
,
5267 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
5268 convert_to_ptrofftype (DR_INIT(dr
))));
5269 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
5271 /* For a store with loop-invariant (but other than power-of-2)
5272 stride (i.e. not a grouped access) like so:
5274 for (i = 0; i < n; i += stride)
5277 we generate a new induction variable and new stores from
5278 the components of the (vectorized) rhs:
5280 for (j = 0; ; j += VF*stride)
5285 array[j + stride] = tmp2;
5289 ivstep
= stride_step
;
5290 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5291 build_int_cst (TREE_TYPE (ivstep
),
5294 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5296 create_iv (stride_base
, ivstep
, NULL
,
5297 loop
, &incr_gsi
, insert_after
,
5299 incr
= gsi_stmt (incr_gsi
);
5300 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
5302 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5304 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5306 prev_stmt_info
= NULL
;
5307 running_off
= offvar
;
5308 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
5309 for (j
= 0; j
< ncopies
; j
++)
5311 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5312 and first_stmt == stmt. */
5314 vec_oprnd
= vect_get_vec_def_for_operand (op
, first_stmt
, NULL
);
5316 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5318 for (i
= 0; i
< nunits
; i
++)
5320 tree newref
, newoff
;
5321 gimple incr
, assign
;
5322 tree size
= TYPE_SIZE (elem_type
);
5323 /* Extract the i'th component. */
5324 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (i
),
5326 tree elem
= fold_build3 (BIT_FIELD_REF
, elem_type
, vec_oprnd
,
5329 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5333 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
5334 running_off
, alias_off
);
5336 /* And store it to *running_off. */
5337 assign
= gimple_build_assign (newref
, elem
);
5338 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5340 newoff
= copy_ssa_name (running_off
, NULL
);
5341 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5342 running_off
, stride_step
);
5343 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5345 running_off
= newoff
;
5346 if (j
== 0 && i
== i
)
5347 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= assign
;
5349 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5350 prev_stmt_info
= vinfo_for_stmt (assign
);
5356 dr_chain
.create (group_size
);
5357 oprnds
.create (group_size
);
5359 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5360 gcc_assert (alignment_support_scheme
);
5361 /* Targets with store-lane instructions must not require explicit
5363 gcc_assert (!store_lanes_p
5364 || alignment_support_scheme
== dr_aligned
5365 || alignment_support_scheme
== dr_unaligned_supported
);
5368 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5371 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5373 aggr_type
= vectype
;
5375 /* In case the vectorization factor (VF) is bigger than the number
5376 of elements that we can fit in a vectype (nunits), we have to generate
5377 more than one vector stmt - i.e - we need to "unroll" the
5378 vector stmt by a factor VF/nunits. For more details see documentation in
5379 vect_get_vec_def_for_copy_stmt. */
5381 /* In case of interleaving (non-unit grouped access):
5388 We create vectorized stores starting from base address (the access of the
5389 first stmt in the chain (S2 in the above example), when the last store stmt
5390 of the chain (S4) is reached:
5393 VS2: &base + vec_size*1 = vx0
5394 VS3: &base + vec_size*2 = vx1
5395 VS4: &base + vec_size*3 = vx3
5397 Then permutation statements are generated:
5399 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5400 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5403 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5404 (the order of the data-refs in the output of vect_permute_store_chain
5405 corresponds to the order of scalar stmts in the interleaving chain - see
5406 the documentation of vect_permute_store_chain()).
5408 In case of both multiple types and interleaving, above vector stores and
5409 permutation stmts are created for every copy. The result vector stmts are
5410 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5411 STMT_VINFO_RELATED_STMT for the next copies.
5414 prev_stmt_info
= NULL
;
5415 for (j
= 0; j
< ncopies
; j
++)
5423 /* Get vectorized arguments for SLP_NODE. */
5424 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5425 NULL
, slp_node
, -1);
5427 vec_oprnd
= vec_oprnds
[0];
5431 /* For interleaved stores we collect vectorized defs for all the
5432 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5433 used as an input to vect_permute_store_chain(), and OPRNDS as
5434 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5436 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5437 OPRNDS are of size 1. */
5438 next_stmt
= first_stmt
;
5439 for (i
= 0; i
< group_size
; i
++)
5441 /* Since gaps are not supported for interleaved stores,
5442 GROUP_SIZE is the exact number of stmts in the chain.
5443 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5444 there is no interleaving, GROUP_SIZE is 1, and only one
5445 iteration of the loop will be executed. */
5446 gcc_assert (next_stmt
5447 && gimple_assign_single_p (next_stmt
));
5448 op
= gimple_assign_rhs1 (next_stmt
);
5450 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5452 dr_chain
.quick_push (vec_oprnd
);
5453 oprnds
.quick_push (vec_oprnd
);
5454 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5458 /* We should have catched mismatched types earlier. */
5459 gcc_assert (useless_type_conversion_p (vectype
,
5460 TREE_TYPE (vec_oprnd
)));
5461 bool simd_lane_access_p
5462 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5463 if (simd_lane_access_p
5464 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5465 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5466 && integer_zerop (DR_OFFSET (first_dr
))
5467 && integer_zerop (DR_INIT (first_dr
))
5468 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5469 get_alias_set (DR_REF (first_dr
))))
5471 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5472 dataref_offset
= build_int_cst (reference_alias_ptr_type
5473 (DR_REF (first_dr
)), 0);
5478 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5479 simd_lane_access_p
? loop
: NULL
,
5480 offset
, &dummy
, gsi
, &ptr_incr
,
5481 simd_lane_access_p
, &inv_p
);
5482 gcc_assert (bb_vinfo
|| !inv_p
);
5486 /* For interleaved stores we created vectorized defs for all the
5487 defs stored in OPRNDS in the previous iteration (previous copy).
5488 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5489 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5491 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5492 OPRNDS are of size 1. */
5493 for (i
= 0; i
< group_size
; i
++)
5496 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5498 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5499 dr_chain
[i
] = vec_oprnd
;
5500 oprnds
[i
] = vec_oprnd
;
5504 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5505 TYPE_SIZE_UNIT (aggr_type
));
5507 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5508 TYPE_SIZE_UNIT (aggr_type
));
5515 /* Combine all the vectors into an array. */
5516 vec_array
= create_vector_array (vectype
, vec_num
);
5517 for (i
= 0; i
< vec_num
; i
++)
5519 vec_oprnd
= dr_chain
[i
];
5520 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5524 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5525 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5526 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5527 gimple_call_set_lhs (new_stmt
, data_ref
);
5528 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5536 result_chain
.create (group_size
);
5538 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5542 next_stmt
= first_stmt
;
5543 for (i
= 0; i
< vec_num
; i
++)
5545 unsigned align
, misalign
;
5548 /* Bump the vector pointer. */
5549 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5553 vec_oprnd
= vec_oprnds
[i
];
5554 else if (grouped_store
)
5555 /* For grouped stores vectorized defs are interleaved in
5556 vect_permute_store_chain(). */
5557 vec_oprnd
= result_chain
[i
];
5559 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5562 : build_int_cst (reference_alias_ptr_type
5563 (DR_REF (first_dr
)), 0));
5564 align
= TYPE_ALIGN_UNIT (vectype
);
5565 if (aligned_access_p (first_dr
))
5567 else if (DR_MISALIGNMENT (first_dr
) == -1)
5569 TREE_TYPE (data_ref
)
5570 = build_aligned_type (TREE_TYPE (data_ref
),
5571 TYPE_ALIGN (elem_type
));
5572 align
= TYPE_ALIGN_UNIT (elem_type
);
5577 TREE_TYPE (data_ref
)
5578 = build_aligned_type (TREE_TYPE (data_ref
),
5579 TYPE_ALIGN (elem_type
));
5580 misalign
= DR_MISALIGNMENT (first_dr
);
5582 if (dataref_offset
== NULL_TREE
)
5583 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5587 && dt
!= vect_constant_def
5588 && dt
!= vect_external_def
)
5590 tree perm_mask
= perm_mask_for_reverse (vectype
);
5592 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5594 tree new_temp
= make_ssa_name (perm_dest
);
5596 /* Generate the permute statement. */
5598 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
5599 vec_oprnd
, perm_mask
);
5600 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5602 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5603 vec_oprnd
= new_temp
;
5606 /* Arguments are ready. Create the new vector stmt. */
5607 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5608 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5613 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5621 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5623 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5624 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5628 dr_chain
.release ();
5630 result_chain
.release ();
5631 vec_oprnds
.release ();
5636 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5637 VECTOR_CST mask. No checks are made that the target platform supports the
5638 mask, so callers may wish to test can_vec_perm_p separately, or use
5639 vect_gen_perm_mask_checked. */
5642 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
5644 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5647 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5649 mask_elt_type
= lang_hooks
.types
.type_for_mode
5650 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5651 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5653 mask_elts
= XALLOCAVEC (tree
, nunits
);
5654 for (i
= nunits
- 1; i
>= 0; i
--)
5655 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5656 mask_vec
= build_vector (mask_type
, mask_elts
);
5661 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5662 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5665 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
5667 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
5668 return vect_gen_perm_mask_any (vectype
, sel
);
5671 /* Given a vector variable X and Y, that was generated for the scalar
5672 STMT, generate instructions to permute the vector elements of X and Y
5673 using permutation mask MASK_VEC, insert them at *GSI and return the
5674 permuted vector variable. */
5677 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5678 gimple_stmt_iterator
*gsi
)
5680 tree vectype
= TREE_TYPE (x
);
5681 tree perm_dest
, data_ref
;
5684 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5685 data_ref
= make_ssa_name (perm_dest
);
5687 /* Generate the permute statement. */
5688 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
5689 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5694 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5695 inserting them on the loops preheader edge. Returns true if we
5696 were successful in doing so (and thus STMT can be moved then),
5697 otherwise returns false. */
5700 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5706 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5708 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5709 if (!gimple_nop_p (def_stmt
)
5710 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5712 /* Make sure we don't need to recurse. While we could do
5713 so in simple cases when there are more complex use webs
5714 we don't have an easy way to preserve stmt order to fulfil
5715 dependencies within them. */
5718 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5720 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5722 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5723 if (!gimple_nop_p (def_stmt2
)
5724 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5734 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5736 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5737 if (!gimple_nop_p (def_stmt
)
5738 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5740 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5741 gsi_remove (&gsi
, false);
5742 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5749 /* vectorizable_load.
5751 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5753 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5754 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5755 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5758 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5759 slp_tree slp_node
, slp_instance slp_node_instance
)
5762 tree vec_dest
= NULL
;
5763 tree data_ref
= NULL
;
5764 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5765 stmt_vec_info prev_stmt_info
;
5766 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5767 struct loop
*loop
= NULL
;
5768 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5769 bool nested_in_vect_loop
= false;
5770 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5771 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5775 gimple new_stmt
= NULL
;
5777 enum dr_alignment_support alignment_support_scheme
;
5778 tree dataref_ptr
= NULL_TREE
;
5779 tree dataref_offset
= NULL_TREE
;
5780 gimple ptr_incr
= NULL
;
5781 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5783 int i
, j
, group_size
= -1, group_gap
;
5784 tree msq
= NULL_TREE
, lsq
;
5785 tree offset
= NULL_TREE
;
5786 tree byte_offset
= NULL_TREE
;
5787 tree realignment_token
= NULL_TREE
;
5789 vec
<tree
> dr_chain
= vNULL
;
5790 bool grouped_load
= false;
5791 bool load_lanes_p
= false;
5794 bool negative
= false;
5795 bool compute_in_loop
= false;
5796 struct loop
*at_loop
;
5798 bool slp
= (slp_node
!= NULL
);
5799 bool slp_perm
= false;
5800 enum tree_code code
;
5801 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5804 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5805 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5806 int gather_scale
= 1;
5807 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5811 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5812 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5813 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5818 /* Multiple types in SLP are handled by creating the appropriate number of
5819 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5821 if (slp
|| PURE_SLP_STMT (stmt_info
))
5824 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5826 gcc_assert (ncopies
>= 1);
5828 /* FORNOW. This restriction should be relaxed. */
5829 if (nested_in_vect_loop
&& ncopies
> 1)
5831 if (dump_enabled_p ())
5832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5833 "multiple types in nested loop.\n");
5837 /* Invalidate assumptions made by dependence analysis when vectorization
5838 on the unrolled body effectively re-orders stmts. */
5840 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5841 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5842 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5846 "cannot perform implicit CSE when unrolling "
5847 "with negative dependence distance\n");
5851 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5854 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5857 /* Is vectorizable load? */
5858 if (!is_gimple_assign (stmt
))
5861 scalar_dest
= gimple_assign_lhs (stmt
);
5862 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5865 code
= gimple_assign_rhs_code (stmt
);
5866 if (code
!= ARRAY_REF
5867 && code
!= BIT_FIELD_REF
5868 && code
!= INDIRECT_REF
5869 && code
!= COMPONENT_REF
5870 && code
!= IMAGPART_EXPR
5871 && code
!= REALPART_EXPR
5873 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5876 if (!STMT_VINFO_DATA_REF (stmt_info
))
5879 elem_type
= TREE_TYPE (vectype
);
5880 mode
= TYPE_MODE (vectype
);
5882 /* FORNOW. In some cases can vectorize even if data-type not supported
5883 (e.g. - data copies). */
5884 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5886 if (dump_enabled_p ())
5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5888 "Aligned load, but unsupported type.\n");
5892 /* Check if the load is a part of an interleaving chain. */
5893 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5895 grouped_load
= true;
5897 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5899 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5901 /* If this is single-element interleaving with an element distance
5902 that leaves unused vector loads around punt - we at least create
5903 very sub-optimal code in that case (and blow up memory,
5905 if (first_stmt
== stmt
5906 && !GROUP_NEXT_ELEMENT (stmt_info
)
5907 && GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
5909 if (dump_enabled_p ())
5910 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5911 "single-element interleaving not supported "
5912 "for not adjacent vector loads\n");
5916 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5918 && !PURE_SLP_STMT (stmt_info
)
5919 && !STMT_VINFO_STRIDED_P (stmt_info
))
5921 if (vect_load_lanes_supported (vectype
, group_size
))
5922 load_lanes_p
= true;
5923 else if (!vect_grouped_load_supported (vectype
, group_size
))
5927 /* Invalidate assumptions made by dependence analysis when vectorization
5928 on the unrolled body effectively re-orders stmts. */
5929 if (!PURE_SLP_STMT (stmt_info
)
5930 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5931 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5932 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5934 if (dump_enabled_p ())
5935 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5936 "cannot perform implicit CSE when performing "
5937 "group loads with negative dependence distance\n");
5941 /* Similarly when the stmt is a load that is both part of a SLP
5942 instance and a loop vectorized stmt via the same-dr mechanism
5943 we have to give up. */
5944 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
5945 && (STMT_SLP_TYPE (stmt_info
)
5946 != STMT_SLP_TYPE (vinfo_for_stmt
5947 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
5949 if (dump_enabled_p ())
5950 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5951 "conflicting SLP types for CSEd load\n");
5957 if (STMT_VINFO_GATHER_P (stmt_info
))
5961 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5962 &gather_off
, &gather_scale
);
5963 gcc_assert (gather_decl
);
5964 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5965 &def_stmt
, &def
, &gather_dt
,
5966 &gather_off_vectype
))
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5970 "gather index use not simple.\n");
5974 else if (STMT_VINFO_STRIDED_P (stmt_info
))
5977 && (slp
|| PURE_SLP_STMT (stmt_info
)))
5978 && (group_size
> nunits
5979 || nunits
% group_size
!= 0
5980 /* ??? During analysis phase we are not called with the
5981 slp node/instance we are in so whether we'll end up
5982 with a permutation we don't know. Still we don't
5983 support load permutations. */
5986 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5987 "unhandled strided group load\n");
5993 negative
= tree_int_cst_compare (nested_in_vect_loop
5994 ? STMT_VINFO_DR_STEP (stmt_info
)
5996 size_zero_node
) < 0;
5997 if (negative
&& ncopies
> 1)
5999 if (dump_enabled_p ())
6000 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6001 "multiple types with negative step.\n");
6009 if (dump_enabled_p ())
6010 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6011 "negative step for group load not supported"
6015 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6016 if (alignment_support_scheme
!= dr_aligned
6017 && alignment_support_scheme
!= dr_unaligned_supported
)
6019 if (dump_enabled_p ())
6020 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6021 "negative step but alignment required.\n");
6024 if (!perm_mask_for_reverse (vectype
))
6026 if (dump_enabled_p ())
6027 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6028 "negative step and reversing not supported."
6035 if (!vec_stmt
) /* transformation not required. */
6037 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6038 /* The SLP costs are calculated during SLP analysis. */
6039 if (!PURE_SLP_STMT (stmt_info
))
6040 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6045 if (dump_enabled_p ())
6046 dump_printf_loc (MSG_NOTE
, vect_location
,
6047 "transform load. ncopies = %d\n", ncopies
);
6051 ensure_base_align (stmt_info
, dr
);
6053 if (STMT_VINFO_GATHER_P (stmt_info
))
6055 tree vec_oprnd0
= NULL_TREE
, op
;
6056 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6057 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6058 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6059 edge pe
= loop_preheader_edge (loop
);
6062 enum { NARROW
, NONE
, WIDEN
} modifier
;
6063 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6065 if (nunits
== gather_off_nunits
)
6067 else if (nunits
== gather_off_nunits
/ 2)
6069 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6072 for (i
= 0; i
< gather_off_nunits
; ++i
)
6073 sel
[i
] = i
| nunits
;
6075 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6077 else if (nunits
== gather_off_nunits
* 2)
6079 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6082 for (i
= 0; i
< nunits
; ++i
)
6083 sel
[i
] = i
< gather_off_nunits
6084 ? i
: i
+ nunits
- gather_off_nunits
;
6086 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6092 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6093 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6094 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6095 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6096 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6097 scaletype
= TREE_VALUE (arglist
);
6098 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6100 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6102 ptr
= fold_convert (ptrtype
, gather_base
);
6103 if (!is_gimple_min_invariant (ptr
))
6105 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6106 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6107 gcc_assert (!new_bb
);
6110 /* Currently we support only unconditional gather loads,
6111 so mask should be all ones. */
6112 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6113 mask
= build_int_cst (masktype
, -1);
6114 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6116 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6117 mask
= build_vector_from_val (masktype
, mask
);
6118 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6120 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6124 for (j
= 0; j
< 6; ++j
)
6126 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6127 mask
= build_real (TREE_TYPE (masktype
), r
);
6128 mask
= build_vector_from_val (masktype
, mask
);
6129 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6134 scale
= build_int_cst (scaletype
, gather_scale
);
6136 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6137 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6138 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6142 for (j
= 0; j
< 6; ++j
)
6144 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6145 merge
= build_real (TREE_TYPE (rettype
), r
);
6149 merge
= build_vector_from_val (rettype
, merge
);
6150 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6152 prev_stmt_info
= NULL
;
6153 for (j
= 0; j
< ncopies
; ++j
)
6155 if (modifier
== WIDEN
&& (j
& 1))
6156 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6157 perm_mask
, stmt
, gsi
);
6160 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
6163 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6165 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6167 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6168 == TYPE_VECTOR_SUBPARTS (idxtype
));
6169 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
6170 var
= make_ssa_name (var
);
6171 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6173 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6174 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6179 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6181 if (!useless_type_conversion_p (vectype
, rettype
))
6183 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6184 == TYPE_VECTOR_SUBPARTS (rettype
));
6185 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
6186 op
= make_ssa_name (var
, new_stmt
);
6187 gimple_call_set_lhs (new_stmt
, op
);
6188 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6189 var
= make_ssa_name (vec_dest
);
6190 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6192 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6196 var
= make_ssa_name (vec_dest
, new_stmt
);
6197 gimple_call_set_lhs (new_stmt
, var
);
6200 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6202 if (modifier
== NARROW
)
6209 var
= permute_vec_elements (prev_res
, var
,
6210 perm_mask
, stmt
, gsi
);
6211 new_stmt
= SSA_NAME_DEF_STMT (var
);
6214 if (prev_stmt_info
== NULL
)
6215 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6217 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6218 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6222 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6224 gimple_stmt_iterator incr_gsi
;
6230 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6231 gimple_seq stmts
= NULL
;
6232 tree stride_base
, stride_step
, alias_off
;
6234 gcc_assert (!nested_in_vect_loop
);
6237 = fold_build_pointer_plus
6238 (unshare_expr (DR_BASE_ADDRESS (dr
)),
6239 size_binop (PLUS_EXPR
,
6240 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
6241 convert_to_ptrofftype (DR_INIT (dr
))));
6242 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
6244 /* For a load with loop-invariant (but other than power-of-2)
6245 stride (i.e. not a grouped access) like so:
6247 for (i = 0; i < n; i += stride)
6250 we generate a new induction variable and new accesses to
6251 form a new vector (or vectors, depending on ncopies):
6253 for (j = 0; ; j += VF*stride)
6255 tmp2 = array[j + stride];
6257 vectemp = {tmp1, tmp2, ...}
6260 ivstep
= stride_step
;
6261 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6262 build_int_cst (TREE_TYPE (ivstep
), vf
));
6264 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6266 create_iv (stride_base
, ivstep
, NULL
,
6267 loop
, &incr_gsi
, insert_after
,
6269 incr
= gsi_stmt (incr_gsi
);
6270 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6272 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6274 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6276 prev_stmt_info
= NULL
;
6277 running_off
= offvar
;
6278 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
6279 int nloads
= nunits
;
6280 tree ltype
= TREE_TYPE (vectype
);
6283 nloads
= nunits
/ group_size
;
6284 if (group_size
< nunits
)
6285 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6288 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6289 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6290 gcc_assert (!slp_perm
);
6292 for (j
= 0; j
< ncopies
; j
++)
6298 vec_alloc (v
, nloads
);
6299 for (i
= 0; i
< nloads
; i
++)
6301 tree newref
, newoff
;
6303 newref
= build2 (MEM_REF
, ltype
, running_off
, alias_off
);
6305 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6308 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6309 newoff
= copy_ssa_name (running_off
);
6310 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6311 running_off
, stride_step
);
6312 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6314 running_off
= newoff
;
6317 vec_inv
= build_constructor (vectype
, v
);
6318 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6319 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6323 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6324 build2 (MEM_REF
, ltype
,
6325 running_off
, alias_off
));
6326 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6328 tree newoff
= copy_ssa_name (running_off
);
6329 gimple incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6330 running_off
, stride_step
);
6331 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6333 running_off
= newoff
;
6337 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6339 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6341 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6342 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6349 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6351 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6352 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6353 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6355 /* Check if the chain of loads is already vectorized. */
6356 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6357 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6358 ??? But we can only do so if there is exactly one
6359 as we have no way to get at the rest. Leave the CSE
6361 ??? With the group load eventually participating
6362 in multiple different permutations (having multiple
6363 slp nodes which refer to the same group) the CSE
6364 is even wrong code. See PR56270. */
6367 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6370 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6371 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6373 /* VEC_NUM is the number of vect stmts to be created for this group. */
6376 grouped_load
= false;
6377 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6378 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6380 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6384 vec_num
= group_size
;
6392 group_size
= vec_num
= 1;
6396 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6397 gcc_assert (alignment_support_scheme
);
6398 /* Targets with load-lane instructions must not require explicit
6400 gcc_assert (!load_lanes_p
6401 || alignment_support_scheme
== dr_aligned
6402 || alignment_support_scheme
== dr_unaligned_supported
);
6404 /* In case the vectorization factor (VF) is bigger than the number
6405 of elements that we can fit in a vectype (nunits), we have to generate
6406 more than one vector stmt - i.e - we need to "unroll" the
6407 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6408 from one copy of the vector stmt to the next, in the field
6409 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6410 stages to find the correct vector defs to be used when vectorizing
6411 stmts that use the defs of the current stmt. The example below
6412 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6413 need to create 4 vectorized stmts):
6415 before vectorization:
6416 RELATED_STMT VEC_STMT
6420 step 1: vectorize stmt S1:
6421 We first create the vector stmt VS1_0, and, as usual, record a
6422 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6423 Next, we create the vector stmt VS1_1, and record a pointer to
6424 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6425 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6427 RELATED_STMT VEC_STMT
6428 VS1_0: vx0 = memref0 VS1_1 -
6429 VS1_1: vx1 = memref1 VS1_2 -
6430 VS1_2: vx2 = memref2 VS1_3 -
6431 VS1_3: vx3 = memref3 - -
6432 S1: x = load - VS1_0
6435 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6436 information we recorded in RELATED_STMT field is used to vectorize
6439 /* In case of interleaving (non-unit grouped access):
6446 Vectorized loads are created in the order of memory accesses
6447 starting from the access of the first stmt of the chain:
6450 VS2: vx1 = &base + vec_size*1
6451 VS3: vx3 = &base + vec_size*2
6452 VS4: vx4 = &base + vec_size*3
6454 Then permutation statements are generated:
6456 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6457 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6460 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6461 (the order of the data-refs in the output of vect_permute_load_chain
6462 corresponds to the order of scalar stmts in the interleaving chain - see
6463 the documentation of vect_permute_load_chain()).
6464 The generation of permutation stmts and recording them in
6465 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6467 In case of both multiple types and interleaving, the vector loads and
6468 permutation stmts above are created for every copy. The result vector
6469 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6470 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6472 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6473 on a target that supports unaligned accesses (dr_unaligned_supported)
6474 we generate the following code:
6478 p = p + indx * vectype_size;
6483 Otherwise, the data reference is potentially unaligned on a target that
6484 does not support unaligned accesses (dr_explicit_realign_optimized) -
6485 then generate the following code, in which the data in each iteration is
6486 obtained by two vector loads, one from the previous iteration, and one
6487 from the current iteration:
6489 msq_init = *(floor(p1))
6490 p2 = initial_addr + VS - 1;
6491 realignment_token = call target_builtin;
6494 p2 = p2 + indx * vectype_size
6496 vec_dest = realign_load (msq, lsq, realignment_token)
6501 /* If the misalignment remains the same throughout the execution of the
6502 loop, we can create the init_addr and permutation mask at the loop
6503 preheader. Otherwise, it needs to be created inside the loop.
6504 This can only occur when vectorizing memory accesses in the inner-loop
6505 nested within an outer-loop that is being vectorized. */
6507 if (nested_in_vect_loop
6508 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6509 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6511 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6512 compute_in_loop
= true;
6515 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6516 || alignment_support_scheme
== dr_explicit_realign
)
6517 && !compute_in_loop
)
6519 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6520 alignment_support_scheme
, NULL_TREE
,
6522 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6524 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
6525 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6533 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6536 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6538 aggr_type
= vectype
;
6540 prev_stmt_info
= NULL
;
6541 for (j
= 0; j
< ncopies
; j
++)
6543 /* 1. Create the vector or array pointer update chain. */
6546 bool simd_lane_access_p
6547 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6548 if (simd_lane_access_p
6549 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6550 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6551 && integer_zerop (DR_OFFSET (first_dr
))
6552 && integer_zerop (DR_INIT (first_dr
))
6553 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6554 get_alias_set (DR_REF (first_dr
)))
6555 && (alignment_support_scheme
== dr_aligned
6556 || alignment_support_scheme
== dr_unaligned_supported
))
6558 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6559 dataref_offset
= build_int_cst (reference_alias_ptr_type
6560 (DR_REF (first_dr
)), 0);
6565 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6566 offset
, &dummy
, gsi
, &ptr_incr
,
6567 simd_lane_access_p
, &inv_p
,
6570 else if (dataref_offset
)
6571 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6572 TYPE_SIZE_UNIT (aggr_type
));
6574 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6575 TYPE_SIZE_UNIT (aggr_type
));
6577 if (grouped_load
|| slp_perm
)
6578 dr_chain
.create (vec_num
);
6584 vec_array
= create_vector_array (vectype
, vec_num
);
6587 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6588 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6589 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6590 gimple_call_set_lhs (new_stmt
, vec_array
);
6591 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6593 /* Extract each vector into an SSA_NAME. */
6594 for (i
= 0; i
< vec_num
; i
++)
6596 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6598 dr_chain
.quick_push (new_temp
);
6601 /* Record the mapping between SSA_NAMEs and statements. */
6602 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6606 for (i
= 0; i
< vec_num
; i
++)
6609 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6612 /* 2. Create the vector-load in the loop. */
6613 switch (alignment_support_scheme
)
6616 case dr_unaligned_supported
:
6618 unsigned int align
, misalign
;
6621 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6624 : build_int_cst (reference_alias_ptr_type
6625 (DR_REF (first_dr
)), 0));
6626 align
= TYPE_ALIGN_UNIT (vectype
);
6627 if (alignment_support_scheme
== dr_aligned
)
6629 gcc_assert (aligned_access_p (first_dr
));
6632 else if (DR_MISALIGNMENT (first_dr
) == -1)
6634 TREE_TYPE (data_ref
)
6635 = build_aligned_type (TREE_TYPE (data_ref
),
6636 TYPE_ALIGN (elem_type
));
6637 align
= TYPE_ALIGN_UNIT (elem_type
);
6642 TREE_TYPE (data_ref
)
6643 = build_aligned_type (TREE_TYPE (data_ref
),
6644 TYPE_ALIGN (elem_type
));
6645 misalign
= DR_MISALIGNMENT (first_dr
);
6647 if (dataref_offset
== NULL_TREE
)
6648 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6652 case dr_explicit_realign
:
6656 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
6658 if (compute_in_loop
)
6659 msq
= vect_setup_realignment (first_stmt
, gsi
,
6661 dr_explicit_realign
,
6664 ptr
= copy_ssa_name (dataref_ptr
);
6665 new_stmt
= gimple_build_assign
6666 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
6668 (TREE_TYPE (dataref_ptr
),
6669 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6670 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6672 = build2 (MEM_REF
, vectype
, ptr
,
6673 build_int_cst (reference_alias_ptr_type
6674 (DR_REF (first_dr
)), 0));
6675 vec_dest
= vect_create_destination_var (scalar_dest
,
6677 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6678 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6679 gimple_assign_set_lhs (new_stmt
, new_temp
);
6680 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6681 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6682 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6685 bump
= size_binop (MULT_EXPR
, vs
,
6686 TYPE_SIZE_UNIT (elem_type
));
6687 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
6688 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6689 new_stmt
= gimple_build_assign
6690 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
6693 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6694 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6695 gimple_assign_set_lhs (new_stmt
, ptr
);
6696 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6698 = build2 (MEM_REF
, vectype
, ptr
,
6699 build_int_cst (reference_alias_ptr_type
6700 (DR_REF (first_dr
)), 0));
6703 case dr_explicit_realign_optimized
:
6704 new_temp
= copy_ssa_name (dataref_ptr
);
6705 new_stmt
= gimple_build_assign
6706 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
6708 (TREE_TYPE (dataref_ptr
),
6709 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6710 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6712 = build2 (MEM_REF
, vectype
, new_temp
,
6713 build_int_cst (reference_alias_ptr_type
6714 (DR_REF (first_dr
)), 0));
6719 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6720 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6721 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6722 gimple_assign_set_lhs (new_stmt
, new_temp
);
6723 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6725 /* 3. Handle explicit realignment if necessary/supported.
6727 vec_dest = realign_load (msq, lsq, realignment_token) */
6728 if (alignment_support_scheme
== dr_explicit_realign_optimized
6729 || alignment_support_scheme
== dr_explicit_realign
)
6731 lsq
= gimple_assign_lhs (new_stmt
);
6732 if (!realignment_token
)
6733 realignment_token
= dataref_ptr
;
6734 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6735 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
6736 msq
, lsq
, realignment_token
);
6737 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6738 gimple_assign_set_lhs (new_stmt
, new_temp
);
6739 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6741 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6744 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6745 add_phi_arg (phi
, lsq
,
6746 loop_latch_edge (containing_loop
),
6752 /* 4. Handle invariant-load. */
6753 if (inv_p
&& !bb_vinfo
)
6755 gcc_assert (!grouped_load
);
6756 /* If we have versioned for aliasing or the loop doesn't
6757 have any data dependencies that would preclude this,
6758 then we are sure this is a loop invariant load and
6759 thus we can insert it on the preheader edge. */
6760 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6761 && !nested_in_vect_loop
6762 && hoist_defs_of_uses (stmt
, loop
))
6764 if (dump_enabled_p ())
6766 dump_printf_loc (MSG_NOTE
, vect_location
,
6767 "hoisting out of the vectorized "
6769 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6771 tree tem
= copy_ssa_name (scalar_dest
);
6772 gsi_insert_on_edge_immediate
6773 (loop_preheader_edge (loop
),
6774 gimple_build_assign (tem
,
6776 (gimple_assign_rhs1 (stmt
))));
6777 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6781 gimple_stmt_iterator gsi2
= *gsi
;
6783 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6786 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6787 set_vinfo_for_stmt (new_stmt
,
6788 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6794 tree perm_mask
= perm_mask_for_reverse (vectype
);
6795 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6796 perm_mask
, stmt
, gsi
);
6797 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6800 /* Collect vector loads and later create their permutation in
6801 vect_transform_grouped_load (). */
6802 if (grouped_load
|| slp_perm
)
6803 dr_chain
.quick_push (new_temp
);
6805 /* Store vector loads in the corresponding SLP_NODE. */
6806 if (slp
&& !slp_perm
)
6807 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6809 /* Bump the vector pointer to account for a gap. */
6810 if (slp
&& group_gap
!= 0)
6812 tree bump
= size_binop (MULT_EXPR
,
6813 TYPE_SIZE_UNIT (elem_type
),
6814 size_int (group_gap
));
6815 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6820 if (slp
&& !slp_perm
)
6825 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6826 slp_node_instance
, false))
6828 dr_chain
.release ();
6837 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6838 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6843 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6845 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6846 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6849 dr_chain
.release ();
6855 /* Function vect_is_simple_cond.
6858 LOOP - the loop that is being vectorized.
6859 COND - Condition that is checked for simple use.
6862 *COMP_VECTYPE - the vector type for the comparison.
6864 Returns whether a COND can be vectorized. Checks whether
6865 condition operands are supportable using vec_is_simple_use. */
6868 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6869 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6873 enum vect_def_type dt
;
6874 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6876 if (!COMPARISON_CLASS_P (cond
))
6879 lhs
= TREE_OPERAND (cond
, 0);
6880 rhs
= TREE_OPERAND (cond
, 1);
6882 if (TREE_CODE (lhs
) == SSA_NAME
)
6884 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6885 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6886 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6889 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6890 && TREE_CODE (lhs
) != FIXED_CST
)
6893 if (TREE_CODE (rhs
) == SSA_NAME
)
6895 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6896 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6897 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6900 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6901 && TREE_CODE (rhs
) != FIXED_CST
)
6904 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6908 /* vectorizable_condition.
6910 Check if STMT is conditional modify expression that can be vectorized.
6911 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6912 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6915 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6916 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6917 else caluse if it is 2).
6919 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6922 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6923 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6926 tree scalar_dest
= NULL_TREE
;
6927 tree vec_dest
= NULL_TREE
;
6928 tree cond_expr
, then_clause
, else_clause
;
6929 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6930 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6931 tree comp_vectype
= NULL_TREE
;
6932 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6933 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6934 tree vec_compare
, vec_cond_expr
;
6936 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6938 enum vect_def_type dt
, dts
[4];
6939 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6941 enum tree_code code
;
6942 stmt_vec_info prev_stmt_info
= NULL
;
6944 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6945 vec
<tree
> vec_oprnds0
= vNULL
;
6946 vec
<tree
> vec_oprnds1
= vNULL
;
6947 vec
<tree
> vec_oprnds2
= vNULL
;
6948 vec
<tree
> vec_oprnds3
= vNULL
;
6951 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6954 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6956 gcc_assert (ncopies
>= 1);
6957 if (reduc_index
&& ncopies
> 1)
6958 return false; /* FORNOW */
6960 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6963 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6966 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6967 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6971 /* FORNOW: not yet supported. */
6972 if (STMT_VINFO_LIVE_P (stmt_info
))
6974 if (dump_enabled_p ())
6975 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6976 "value used after loop.\n");
6980 /* Is vectorizable conditional operation? */
6981 if (!is_gimple_assign (stmt
))
6984 code
= gimple_assign_rhs_code (stmt
);
6986 if (code
!= COND_EXPR
)
6989 cond_expr
= gimple_assign_rhs1 (stmt
);
6990 then_clause
= gimple_assign_rhs2 (stmt
);
6991 else_clause
= gimple_assign_rhs3 (stmt
);
6993 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
6998 if (TREE_CODE (then_clause
) == SSA_NAME
)
7000 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
7001 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
7002 &then_def_stmt
, &def
, &dt
))
7005 else if (TREE_CODE (then_clause
) != INTEGER_CST
7006 && TREE_CODE (then_clause
) != REAL_CST
7007 && TREE_CODE (then_clause
) != FIXED_CST
)
7010 if (TREE_CODE (else_clause
) == SSA_NAME
)
7012 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
7013 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
7014 &else_def_stmt
, &def
, &dt
))
7017 else if (TREE_CODE (else_clause
) != INTEGER_CST
7018 && TREE_CODE (else_clause
) != REAL_CST
7019 && TREE_CODE (else_clause
) != FIXED_CST
)
7022 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
7023 /* The result of a vector comparison should be signed type. */
7024 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
7025 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
7026 if (vec_cmp_type
== NULL_TREE
)
7031 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7032 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7039 vec_oprnds0
.create (1);
7040 vec_oprnds1
.create (1);
7041 vec_oprnds2
.create (1);
7042 vec_oprnds3
.create (1);
7046 scalar_dest
= gimple_assign_lhs (stmt
);
7047 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7049 /* Handle cond expr. */
7050 for (j
= 0; j
< ncopies
; j
++)
7052 gassign
*new_stmt
= NULL
;
7057 auto_vec
<tree
, 4> ops
;
7058 auto_vec
<vec
<tree
>, 4> vec_defs
;
7060 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7061 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7062 ops
.safe_push (then_clause
);
7063 ops
.safe_push (else_clause
);
7064 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7065 vec_oprnds3
= vec_defs
.pop ();
7066 vec_oprnds2
= vec_defs
.pop ();
7067 vec_oprnds1
= vec_defs
.pop ();
7068 vec_oprnds0
= vec_defs
.pop ();
7071 vec_defs
.release ();
7077 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7079 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
7080 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
7083 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7085 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
7086 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
7087 if (reduc_index
== 1)
7088 vec_then_clause
= reduc_def
;
7091 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7093 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
7094 NULL
, >emp
, &def
, &dts
[2]);
7096 if (reduc_index
== 2)
7097 vec_else_clause
= reduc_def
;
7100 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7102 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
7103 NULL
, >emp
, &def
, &dts
[3]);
7109 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
7110 vec_oprnds0
.pop ());
7111 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
7112 vec_oprnds1
.pop ());
7113 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7114 vec_oprnds2
.pop ());
7115 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7116 vec_oprnds3
.pop ());
7121 vec_oprnds0
.quick_push (vec_cond_lhs
);
7122 vec_oprnds1
.quick_push (vec_cond_rhs
);
7123 vec_oprnds2
.quick_push (vec_then_clause
);
7124 vec_oprnds3
.quick_push (vec_else_clause
);
7127 /* Arguments are ready. Create the new vector stmt. */
7128 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7130 vec_cond_rhs
= vec_oprnds1
[i
];
7131 vec_then_clause
= vec_oprnds2
[i
];
7132 vec_else_clause
= vec_oprnds3
[i
];
7134 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7135 vec_cond_lhs
, vec_cond_rhs
);
7136 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
7137 vec_compare
, vec_then_clause
, vec_else_clause
);
7139 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
7140 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7141 gimple_assign_set_lhs (new_stmt
, new_temp
);
7142 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7144 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7151 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7153 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7155 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7158 vec_oprnds0
.release ();
7159 vec_oprnds1
.release ();
7160 vec_oprnds2
.release ();
7161 vec_oprnds3
.release ();
7167 /* Make sure the statement is vectorizable. */
7170 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
7172 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7173 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7174 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7176 tree scalar_type
, vectype
;
7177 gimple pattern_stmt
;
7178 gimple_seq pattern_def_seq
;
7180 if (dump_enabled_p ())
7182 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7183 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7186 if (gimple_has_volatile_ops (stmt
))
7188 if (dump_enabled_p ())
7189 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7190 "not vectorized: stmt has volatile operands\n");
7195 /* Skip stmts that do not need to be vectorized. In loops this is expected
7197 - the COND_EXPR which is the loop exit condition
7198 - any LABEL_EXPRs in the loop
7199 - computations that are used only for array indexing or loop control.
7200 In basic blocks we only analyze statements that are a part of some SLP
7201 instance, therefore, all the statements are relevant.
7203 Pattern statement needs to be analyzed instead of the original statement
7204 if the original statement is not relevant. Otherwise, we analyze both
7205 statements. In basic blocks we are called from some SLP instance
7206 traversal, don't analyze pattern stmts instead, the pattern stmts
7207 already will be part of SLP instance. */
7209 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7210 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7211 && !STMT_VINFO_LIVE_P (stmt_info
))
7213 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7215 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7216 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7218 /* Analyze PATTERN_STMT instead of the original stmt. */
7219 stmt
= pattern_stmt
;
7220 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7221 if (dump_enabled_p ())
7223 dump_printf_loc (MSG_NOTE
, vect_location
,
7224 "==> examining pattern statement: ");
7225 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7230 if (dump_enabled_p ())
7231 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7236 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7239 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7240 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7242 /* Analyze PATTERN_STMT too. */
7243 if (dump_enabled_p ())
7245 dump_printf_loc (MSG_NOTE
, vect_location
,
7246 "==> examining pattern statement: ");
7247 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7250 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7254 if (is_pattern_stmt_p (stmt_info
)
7256 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7258 gimple_stmt_iterator si
;
7260 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7262 gimple pattern_def_stmt
= gsi_stmt (si
);
7263 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7264 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7266 /* Analyze def stmt of STMT if it's a pattern stmt. */
7267 if (dump_enabled_p ())
7269 dump_printf_loc (MSG_NOTE
, vect_location
,
7270 "==> examining pattern def statement: ");
7271 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7274 if (!vect_analyze_stmt (pattern_def_stmt
,
7275 need_to_vectorize
, node
))
7281 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7283 case vect_internal_def
:
7286 case vect_reduction_def
:
7287 case vect_nested_cycle
:
7288 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
7289 || relevance
== vect_used_in_outer_by_reduction
7290 || relevance
== vect_unused_in_scope
));
7293 case vect_induction_def
:
7294 case vect_constant_def
:
7295 case vect_external_def
:
7296 case vect_unknown_def_type
:
7303 gcc_assert (PURE_SLP_STMT (stmt_info
));
7305 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7306 if (dump_enabled_p ())
7308 dump_printf_loc (MSG_NOTE
, vect_location
,
7309 "get vectype for scalar type: ");
7310 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7311 dump_printf (MSG_NOTE
, "\n");
7314 vectype
= get_vectype_for_scalar_type (scalar_type
);
7317 if (dump_enabled_p ())
7319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7320 "not SLPed: unsupported data-type ");
7321 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7323 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7328 if (dump_enabled_p ())
7330 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7331 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7332 dump_printf (MSG_NOTE
, "\n");
7335 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7338 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7340 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7341 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7342 || (is_gimple_call (stmt
)
7343 && gimple_call_lhs (stmt
) == NULL_TREE
));
7344 *need_to_vectorize
= true;
7349 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7350 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7351 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, NULL
)
7352 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
7353 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
7354 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
7355 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
7356 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
7357 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
7358 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
7359 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
7360 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
7364 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7365 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7366 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7367 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7368 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7369 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7370 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7371 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7372 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7377 if (dump_enabled_p ())
7379 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7380 "not vectorized: relevant stmt not ");
7381 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7382 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7391 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7392 need extra handling, except for vectorizable reductions. */
7393 if (STMT_VINFO_LIVE_P (stmt_info
)
7394 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7395 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7399 if (dump_enabled_p ())
7401 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7402 "not vectorized: live stmt not ");
7403 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7404 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7414 /* Function vect_transform_stmt.
7416 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7419 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7420 bool *grouped_store
, slp_tree slp_node
,
7421 slp_instance slp_node_instance
)
7423 bool is_store
= false;
7424 gimple vec_stmt
= NULL
;
7425 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7428 switch (STMT_VINFO_TYPE (stmt_info
))
7430 case type_demotion_vec_info_type
:
7431 case type_promotion_vec_info_type
:
7432 case type_conversion_vec_info_type
:
7433 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7437 case induc_vec_info_type
:
7438 gcc_assert (!slp_node
);
7439 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7443 case shift_vec_info_type
:
7444 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7448 case op_vec_info_type
:
7449 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7453 case assignment_vec_info_type
:
7454 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7458 case load_vec_info_type
:
7459 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7464 case store_vec_info_type
:
7465 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7467 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7469 /* In case of interleaving, the whole chain is vectorized when the
7470 last store in the chain is reached. Store stmts before the last
7471 one are skipped, and there vec_stmt_info shouldn't be freed
7473 *grouped_store
= true;
7474 if (STMT_VINFO_VEC_STMT (stmt_info
))
7481 case condition_vec_info_type
:
7482 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7486 case call_vec_info_type
:
7487 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7488 stmt
= gsi_stmt (*gsi
);
7489 if (is_gimple_call (stmt
)
7490 && gimple_call_internal_p (stmt
)
7491 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7495 case call_simd_clone_vec_info_type
:
7496 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7497 stmt
= gsi_stmt (*gsi
);
7500 case reduc_vec_info_type
:
7501 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7506 if (!STMT_VINFO_LIVE_P (stmt_info
))
7508 if (dump_enabled_p ())
7509 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7510 "stmt not supported.\n");
7515 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7516 is being vectorized, but outside the immediately enclosing loop. */
7518 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7519 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7520 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7521 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7522 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7523 || STMT_VINFO_RELEVANT (stmt_info
) ==
7524 vect_used_in_outer_by_reduction
))
7526 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7527 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7528 imm_use_iterator imm_iter
;
7529 use_operand_p use_p
;
7533 if (dump_enabled_p ())
7534 dump_printf_loc (MSG_NOTE
, vect_location
,
7535 "Record the vdef for outer-loop vectorization.\n");
7537 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7538 (to be used when vectorizing outer-loop stmts that use the DEF of
7540 if (gimple_code (stmt
) == GIMPLE_PHI
)
7541 scalar_dest
= PHI_RESULT (stmt
);
7543 scalar_dest
= gimple_assign_lhs (stmt
);
7545 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7547 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7549 exit_phi
= USE_STMT (use_p
);
7550 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7555 /* Handle stmts whose DEF is used outside the loop-nest that is
7556 being vectorized. */
7557 if (STMT_VINFO_LIVE_P (stmt_info
)
7558 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7560 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7565 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7571 /* Remove a group of stores (for SLP or interleaving), free their
7575 vect_remove_stores (gimple first_stmt
)
7577 gimple next
= first_stmt
;
7579 gimple_stmt_iterator next_si
;
7583 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7585 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7586 if (is_pattern_stmt_p (stmt_info
))
7587 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7588 /* Free the attached stmt_vec_info and remove the stmt. */
7589 next_si
= gsi_for_stmt (next
);
7590 unlink_stmt_vdef (next
);
7591 gsi_remove (&next_si
, true);
7592 release_defs (next
);
7593 free_stmt_vec_info (next
);
7599 /* Function new_stmt_vec_info.
7601 Create and initialize a new stmt_vec_info struct for STMT. */
7604 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7605 bb_vec_info bb_vinfo
)
7608 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7610 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7611 STMT_VINFO_STMT (res
) = stmt
;
7612 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7613 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7614 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7615 STMT_VINFO_LIVE_P (res
) = false;
7616 STMT_VINFO_VECTYPE (res
) = NULL
;
7617 STMT_VINFO_VEC_STMT (res
) = NULL
;
7618 STMT_VINFO_VECTORIZABLE (res
) = true;
7619 STMT_VINFO_IN_PATTERN_P (res
) = false;
7620 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7621 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7622 STMT_VINFO_DATA_REF (res
) = NULL
;
7624 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7625 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7626 STMT_VINFO_DR_INIT (res
) = NULL
;
7627 STMT_VINFO_DR_STEP (res
) = NULL
;
7628 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7630 if (gimple_code (stmt
) == GIMPLE_PHI
7631 && is_loop_header_bb_p (gimple_bb (stmt
)))
7632 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7634 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7636 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7637 STMT_SLP_TYPE (res
) = loop_vect
;
7638 GROUP_FIRST_ELEMENT (res
) = NULL
;
7639 GROUP_NEXT_ELEMENT (res
) = NULL
;
7640 GROUP_SIZE (res
) = 0;
7641 GROUP_STORE_COUNT (res
) = 0;
7642 GROUP_GAP (res
) = 0;
7643 GROUP_SAME_DR_STMT (res
) = NULL
;
7649 /* Create a hash table for stmt_vec_info. */
7652 init_stmt_vec_info_vec (void)
7654 gcc_assert (!stmt_vec_info_vec
.exists ());
7655 stmt_vec_info_vec
.create (50);
7659 /* Free hash table for stmt_vec_info. */
7662 free_stmt_vec_info_vec (void)
7666 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7668 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7669 gcc_assert (stmt_vec_info_vec
.exists ());
7670 stmt_vec_info_vec
.release ();
7674 /* Free stmt vectorization related info. */
7677 free_stmt_vec_info (gimple stmt
)
7679 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7684 /* Check if this statement has a related "pattern stmt"
7685 (introduced by the vectorizer during the pattern recognition
7686 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7688 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7690 stmt_vec_info patt_info
7691 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7694 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7695 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7696 gimple_set_bb (patt_stmt
, NULL
);
7697 tree lhs
= gimple_get_lhs (patt_stmt
);
7698 if (TREE_CODE (lhs
) == SSA_NAME
)
7699 release_ssa_name (lhs
);
7702 gimple_stmt_iterator si
;
7703 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7705 gimple seq_stmt
= gsi_stmt (si
);
7706 gimple_set_bb (seq_stmt
, NULL
);
7707 lhs
= gimple_get_lhs (patt_stmt
);
7708 if (TREE_CODE (lhs
) == SSA_NAME
)
7709 release_ssa_name (lhs
);
7710 free_stmt_vec_info (seq_stmt
);
7713 free_stmt_vec_info (patt_stmt
);
7717 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7718 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
7719 set_vinfo_for_stmt (stmt
, NULL
);
7724 /* Function get_vectype_for_scalar_type_and_size.
7726 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7730 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7732 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7733 machine_mode simd_mode
;
7734 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7741 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7742 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7745 /* For vector types of elements whose mode precision doesn't
7746 match their types precision we use a element type of mode
7747 precision. The vectorization routines will have to make sure
7748 they support the proper result truncation/extension.
7749 We also make sure to build vector types with INTEGER_TYPE
7750 component type only. */
7751 if (INTEGRAL_TYPE_P (scalar_type
)
7752 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7753 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7754 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7755 TYPE_UNSIGNED (scalar_type
));
7757 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7758 When the component mode passes the above test simply use a type
7759 corresponding to that mode. The theory is that any use that
7760 would cause problems with this will disable vectorization anyway. */
7761 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7762 && !INTEGRAL_TYPE_P (scalar_type
))
7763 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7765 /* We can't build a vector type of elements with alignment bigger than
7767 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7768 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7769 TYPE_UNSIGNED (scalar_type
));
7771 /* If we felt back to using the mode fail if there was
7772 no scalar type for it. */
7773 if (scalar_type
== NULL_TREE
)
7776 /* If no size was supplied use the mode the target prefers. Otherwise
7777 lookup a vector mode of the specified size. */
7779 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7781 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7782 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7786 vectype
= build_vector_type (scalar_type
, nunits
);
7788 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7789 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7795 unsigned int current_vector_size
;
7797 /* Function get_vectype_for_scalar_type.
7799 Returns the vector type corresponding to SCALAR_TYPE as supported
7803 get_vectype_for_scalar_type (tree scalar_type
)
7806 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7807 current_vector_size
);
7809 && current_vector_size
== 0)
7810 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7814 /* Function get_same_sized_vectype
7816 Returns a vector type corresponding to SCALAR_TYPE of size
7817 VECTOR_TYPE if supported by the target. */
7820 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7822 return get_vectype_for_scalar_type_and_size
7823 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7826 /* Function vect_is_simple_use.
7829 LOOP_VINFO - the vect info of the loop that is being vectorized.
7830 BB_VINFO - the vect info of the basic block that is being vectorized.
7831 OPERAND - operand of STMT in the loop or bb.
7832 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7834 Returns whether a stmt with OPERAND can be vectorized.
7835 For loops, supportable operands are constants, loop invariants, and operands
7836 that are defined by the current iteration of the loop. Unsupportable
7837 operands are those that are defined by a previous iteration of the loop (as
7838 is the case in reduction/induction computations).
7839 For basic blocks, supportable operands are constants and bb invariants.
7840 For now, operands defined outside the basic block are not supported. */
7843 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7844 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7845 tree
*def
, enum vect_def_type
*dt
)
7848 stmt_vec_info stmt_vinfo
;
7849 struct loop
*loop
= NULL
;
7852 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7857 if (dump_enabled_p ())
7859 dump_printf_loc (MSG_NOTE
, vect_location
,
7860 "vect_is_simple_use: operand ");
7861 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7862 dump_printf (MSG_NOTE
, "\n");
7865 if (CONSTANT_CLASS_P (operand
))
7867 *dt
= vect_constant_def
;
7871 if (is_gimple_min_invariant (operand
))
7874 *dt
= vect_external_def
;
7878 if (TREE_CODE (operand
) == PAREN_EXPR
)
7880 if (dump_enabled_p ())
7881 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7882 operand
= TREE_OPERAND (operand
, 0);
7885 if (TREE_CODE (operand
) != SSA_NAME
)
7887 if (dump_enabled_p ())
7888 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7893 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7894 if (*def_stmt
== NULL
)
7896 if (dump_enabled_p ())
7897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7902 if (dump_enabled_p ())
7904 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7905 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7908 /* Empty stmt is expected only in case of a function argument.
7909 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7910 if (gimple_nop_p (*def_stmt
))
7913 *dt
= vect_external_def
;
7917 bb
= gimple_bb (*def_stmt
);
7919 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7920 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7921 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7922 *dt
= vect_external_def
;
7925 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7926 if (!loop
&& !STMT_VINFO_VECTORIZABLE (stmt_vinfo
))
7927 *dt
= vect_external_def
;
7929 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7932 if (dump_enabled_p ())
7934 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
7937 case vect_uninitialized_def
:
7938 dump_printf (MSG_NOTE
, "uninitialized\n");
7940 case vect_constant_def
:
7941 dump_printf (MSG_NOTE
, "constant\n");
7943 case vect_external_def
:
7944 dump_printf (MSG_NOTE
, "external\n");
7946 case vect_internal_def
:
7947 dump_printf (MSG_NOTE
, "internal\n");
7949 case vect_induction_def
:
7950 dump_printf (MSG_NOTE
, "induction\n");
7952 case vect_reduction_def
:
7953 dump_printf (MSG_NOTE
, "reduction\n");
7955 case vect_double_reduction_def
:
7956 dump_printf (MSG_NOTE
, "double reduction\n");
7958 case vect_nested_cycle
:
7959 dump_printf (MSG_NOTE
, "nested cycle\n");
7961 case vect_unknown_def_type
:
7962 dump_printf (MSG_NOTE
, "unknown\n");
7967 if (*dt
== vect_unknown_def_type
7969 && *dt
== vect_double_reduction_def
7970 && gimple_code (stmt
) != GIMPLE_PHI
))
7972 if (dump_enabled_p ())
7973 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7974 "Unsupported pattern.\n");
7978 switch (gimple_code (*def_stmt
))
7981 *def
= gimple_phi_result (*def_stmt
);
7985 *def
= gimple_assign_lhs (*def_stmt
);
7989 *def
= gimple_call_lhs (*def_stmt
);
7994 if (dump_enabled_p ())
7995 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7996 "unsupported defining stmt:\n");
8003 /* Function vect_is_simple_use_1.
8005 Same as vect_is_simple_use_1 but also determines the vector operand
8006 type of OPERAND and stores it to *VECTYPE. If the definition of
8007 OPERAND is vect_uninitialized_def, vect_constant_def or
8008 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8009 is responsible to compute the best suited vector type for the
8013 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
8014 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
8015 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
8017 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
8021 /* Now get a vector type if the def is internal, otherwise supply
8022 NULL_TREE and leave it up to the caller to figure out a proper
8023 type for the use stmt. */
8024 if (*dt
== vect_internal_def
8025 || *dt
== vect_induction_def
8026 || *dt
== vect_reduction_def
8027 || *dt
== vect_double_reduction_def
8028 || *dt
== vect_nested_cycle
)
8030 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8032 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8033 && !STMT_VINFO_RELEVANT (stmt_info
)
8034 && !STMT_VINFO_LIVE_P (stmt_info
))
8035 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8037 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8038 gcc_assert (*vectype
!= NULL_TREE
);
8040 else if (*dt
== vect_uninitialized_def
8041 || *dt
== vect_constant_def
8042 || *dt
== vect_external_def
)
8043 *vectype
= NULL_TREE
;
8051 /* Function supportable_widening_operation
8053 Check whether an operation represented by the code CODE is a
8054 widening operation that is supported by the target platform in
8055 vector form (i.e., when operating on arguments of type VECTYPE_IN
8056 producing a result of type VECTYPE_OUT).
8058 Widening operations we currently support are NOP (CONVERT), FLOAT
8059 and WIDEN_MULT. This function checks if these operations are supported
8060 by the target platform either directly (via vector tree-codes), or via
8064 - CODE1 and CODE2 are codes of vector operations to be used when
8065 vectorizing the operation, if available.
8066 - MULTI_STEP_CVT determines the number of required intermediate steps in
8067 case of multi-step conversion (like char->short->int - in that case
8068 MULTI_STEP_CVT will be 1).
8069 - INTERM_TYPES contains the intermediate type required to perform the
8070 widening operation (short in the above example). */
8073 supportable_widening_operation (enum tree_code code
, gimple stmt
,
8074 tree vectype_out
, tree vectype_in
,
8075 enum tree_code
*code1
, enum tree_code
*code2
,
8076 int *multi_step_cvt
,
8077 vec
<tree
> *interm_types
)
8079 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8080 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8081 struct loop
*vect_loop
= NULL
;
8082 machine_mode vec_mode
;
8083 enum insn_code icode1
, icode2
;
8084 optab optab1
, optab2
;
8085 tree vectype
= vectype_in
;
8086 tree wide_vectype
= vectype_out
;
8087 enum tree_code c1
, c2
;
8089 tree prev_type
, intermediate_type
;
8090 machine_mode intermediate_mode
, prev_mode
;
8091 optab optab3
, optab4
;
8093 *multi_step_cvt
= 0;
8095 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8099 case WIDEN_MULT_EXPR
:
8100 /* The result of a vectorized widening operation usually requires
8101 two vectors (because the widened results do not fit into one vector).
8102 The generated vector results would normally be expected to be
8103 generated in the same order as in the original scalar computation,
8104 i.e. if 8 results are generated in each vector iteration, they are
8105 to be organized as follows:
8106 vect1: [res1,res2,res3,res4],
8107 vect2: [res5,res6,res7,res8].
8109 However, in the special case that the result of the widening
8110 operation is used in a reduction computation only, the order doesn't
8111 matter (because when vectorizing a reduction we change the order of
8112 the computation). Some targets can take advantage of this and
8113 generate more efficient code. For example, targets like Altivec,
8114 that support widen_mult using a sequence of {mult_even,mult_odd}
8115 generate the following vectors:
8116 vect1: [res1,res3,res5,res7],
8117 vect2: [res2,res4,res6,res8].
8119 When vectorizing outer-loops, we execute the inner-loop sequentially
8120 (each vectorized inner-loop iteration contributes to VF outer-loop
8121 iterations in parallel). We therefore don't allow to change the
8122 order of the computation in the inner-loop during outer-loop
8124 /* TODO: Another case in which order doesn't *really* matter is when we
8125 widen and then contract again, e.g. (short)((int)x * y >> 8).
8126 Normally, pack_trunc performs an even/odd permute, whereas the
8127 repack from an even/odd expansion would be an interleave, which
8128 would be significantly simpler for e.g. AVX2. */
8129 /* In any case, in order to avoid duplicating the code below, recurse
8130 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8131 are properly set up for the caller. If we fail, we'll continue with
8132 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8134 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8135 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8136 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8137 stmt
, vectype_out
, vectype_in
,
8138 code1
, code2
, multi_step_cvt
,
8141 /* Elements in a vector with vect_used_by_reduction property cannot
8142 be reordered if the use chain with this property does not have the
8143 same operation. One such an example is s += a * b, where elements
8144 in a and b cannot be reordered. Here we check if the vector defined
8145 by STMT is only directly used in the reduction statement. */
8146 tree lhs
= gimple_assign_lhs (stmt
);
8147 use_operand_p dummy
;
8149 stmt_vec_info use_stmt_info
= NULL
;
8150 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8151 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8152 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8155 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8156 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8159 case VEC_WIDEN_MULT_EVEN_EXPR
:
8160 /* Support the recursion induced just above. */
8161 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8162 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8165 case WIDEN_LSHIFT_EXPR
:
8166 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8167 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8171 c1
= VEC_UNPACK_LO_EXPR
;
8172 c2
= VEC_UNPACK_HI_EXPR
;
8176 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8177 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8180 case FIX_TRUNC_EXPR
:
8181 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8182 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8183 computing the operation. */
8190 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8192 enum tree_code ctmp
= c1
;
8197 if (code
== FIX_TRUNC_EXPR
)
8199 /* The signedness is determined from output operand. */
8200 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8201 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8205 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8206 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8209 if (!optab1
|| !optab2
)
8212 vec_mode
= TYPE_MODE (vectype
);
8213 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8214 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8220 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8221 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8224 /* Check if it's a multi-step conversion that can be done using intermediate
8227 prev_type
= vectype
;
8228 prev_mode
= vec_mode
;
8230 if (!CONVERT_EXPR_CODE_P (code
))
8233 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8234 intermediate steps in promotion sequence. We try
8235 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8237 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8238 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8240 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8242 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8243 TYPE_UNSIGNED (prev_type
));
8244 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8245 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8247 if (!optab3
|| !optab4
8248 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8249 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8250 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8251 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
8252 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
8253 == CODE_FOR_nothing
)
8254 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
8255 == CODE_FOR_nothing
))
8258 interm_types
->quick_push (intermediate_type
);
8259 (*multi_step_cvt
)++;
8261 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8262 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8265 prev_type
= intermediate_type
;
8266 prev_mode
= intermediate_mode
;
8269 interm_types
->release ();
8274 /* Function supportable_narrowing_operation
8276 Check whether an operation represented by the code CODE is a
8277 narrowing operation that is supported by the target platform in
8278 vector form (i.e., when operating on arguments of type VECTYPE_IN
8279 and producing a result of type VECTYPE_OUT).
8281 Narrowing operations we currently support are NOP (CONVERT) and
8282 FIX_TRUNC. This function checks if these operations are supported by
8283 the target platform directly via vector tree-codes.
8286 - CODE1 is the code of a vector operation to be used when
8287 vectorizing the operation, if available.
8288 - MULTI_STEP_CVT determines the number of required intermediate steps in
8289 case of multi-step conversion (like int->short->char - in that case
8290 MULTI_STEP_CVT will be 1).
8291 - INTERM_TYPES contains the intermediate type required to perform the
8292 narrowing operation (short in the above example). */
8295 supportable_narrowing_operation (enum tree_code code
,
8296 tree vectype_out
, tree vectype_in
,
8297 enum tree_code
*code1
, int *multi_step_cvt
,
8298 vec
<tree
> *interm_types
)
8300 machine_mode vec_mode
;
8301 enum insn_code icode1
;
8302 optab optab1
, interm_optab
;
8303 tree vectype
= vectype_in
;
8304 tree narrow_vectype
= vectype_out
;
8306 tree intermediate_type
;
8307 machine_mode intermediate_mode
, prev_mode
;
8311 *multi_step_cvt
= 0;
8315 c1
= VEC_PACK_TRUNC_EXPR
;
8318 case FIX_TRUNC_EXPR
:
8319 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8323 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8324 tree code and optabs used for computing the operation. */
8331 if (code
== FIX_TRUNC_EXPR
)
8332 /* The signedness is determined from output operand. */
8333 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8335 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8340 vec_mode
= TYPE_MODE (vectype
);
8341 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8346 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8349 /* Check if it's a multi-step conversion that can be done using intermediate
8351 prev_mode
= vec_mode
;
8352 if (code
== FIX_TRUNC_EXPR
)
8353 uns
= TYPE_UNSIGNED (vectype_out
);
8355 uns
= TYPE_UNSIGNED (vectype
);
8357 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8358 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8359 costly than signed. */
8360 if (code
== FIX_TRUNC_EXPR
&& uns
)
8362 enum insn_code icode2
;
8365 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8367 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8368 if (interm_optab
!= unknown_optab
8369 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8370 && insn_data
[icode1
].operand
[0].mode
8371 == insn_data
[icode2
].operand
[0].mode
)
8374 optab1
= interm_optab
;
8379 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8380 intermediate steps in promotion sequence. We try
8381 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8382 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8383 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8385 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8387 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8389 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8392 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8393 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8394 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8395 == CODE_FOR_nothing
))
8398 interm_types
->quick_push (intermediate_type
);
8399 (*multi_step_cvt
)++;
8401 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8404 prev_mode
= intermediate_mode
;
8405 optab1
= interm_optab
;
8408 interm_types
->release ();