PR rtl-optimization/88470
[official-gcc.git] / gcc / tree-vect-stmts.c
blob3188bb8c35179ea09fac4f6154533eba58aeb84a
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
101 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
102 body_cost_vec->safe_push (si);
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
111 static tree
112 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT_INFO and the vector is associated
121 with scalar destination SCALAR_DEST. */
123 static tree
124 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
125 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
127 tree vect_type, vect, vect_name, array_ref;
128 gimple *new_stmt;
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
140 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
142 return vect_name;
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT_INFO. */
149 static void
150 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
151 tree vect, tree array, unsigned HOST_WIDE_INT n)
153 tree array_ref;
154 gimple *new_stmt;
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
160 new_stmt = gimple_build_assign (array_ref, vect);
161 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
168 static tree
169 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
171 tree mem_ref;
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
176 return mem_ref;
179 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
180 Emit the clobber before *GSI. */
182 static void
183 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
184 tree var)
186 tree clobber = build_clobber (TREE_TYPE (var));
187 gimple *new_stmt = gimple_build_assign (var, clobber);
188 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
197 static void
198 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
199 enum vect_relevant relevant, bool live_p)
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
204 if (dump_enabled_p ())
205 dump_printf_loc (MSG_NOTE, vect_location,
206 "mark relevant %d, live %d: %G", relevant, live_p,
207 stmt_info->stmt);
209 /* If this stmt is an original stmt in a pattern, we might need to mark its
210 related pattern stmt instead of the original stmt. However, such stmts
211 may have their own uses that are not in any pattern, in such cases the
212 stmt itself should be marked. */
213 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
215 /* This is the last stmt in a sequence that was detected as a
216 pattern that can potentially be vectorized. Don't mark the stmt
217 as relevant/live because it's not going to be vectorized.
218 Instead mark the pattern-stmt that replaces it. */
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_vec_info old_stmt_info = stmt_info;
225 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
226 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
227 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
228 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE, vect_location,
240 "already marked relevant/live.\n");
241 return;
244 worklist->safe_push (stmt_info);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT_INFO is simple and all uses of it are invariant. */
252 bool
253 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
254 loop_vec_info loop_vinfo)
256 tree op;
257 ssa_op_iter iter;
259 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
260 if (!stmt)
261 return false;
263 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
265 enum vect_def_type dt = vect_uninitialized_def;
267 if (!vect_is_simple_use (op, loop_vinfo, &dt))
269 if (dump_enabled_p ())
270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
271 "use not simple.\n");
272 return false;
275 if (dt != vect_external_def && dt != vect_constant_def)
276 return false;
278 return true;
281 /* Function vect_stmt_relevant_p.
283 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
284 is "relevant for vectorization".
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
291 CHECKME: what other side effects would the vectorizer allow? */
293 static bool
294 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
295 enum vect_relevant *relevant, bool *live_p)
297 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
298 ssa_op_iter op_iter;
299 imm_use_iterator imm_iter;
300 use_operand_p use_p;
301 def_operand_p def_p;
303 *relevant = vect_unused_in_scope;
304 *live_p = false;
306 /* cond stmt other than loop exit cond. */
307 if (is_ctrl_stmt (stmt_info->stmt)
308 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
311 /* changing memory. */
312 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt_info->stmt)
314 && !gimple_clobber_p (stmt_info->stmt))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
342 *live_p = true;
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
356 return (*live_p || *relevant);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT_INFO. Check if USE is
363 used in STMT_INFO for anything other than indexing an array. */
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
368 tree operand;
370 /* USE corresponds to some operand in STMT. If there is no data
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info))
374 return true;
376 /* STMT has a data_ref. FORNOW this means that its of one of
377 the following forms:
378 -1- ARRAY_REF = var
379 -2- var = ARRAY_REF
380 (This should have been verified in analyze_data_refs).
382 'var' in the second case corresponds to a def, not a use,
383 so USE cannot correspond to any operands that are not used
384 for array indexing.
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
389 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
390 if (!assign || !gimple_assign_copy_p (assign))
392 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
393 if (call && gimple_call_internal_p (call))
395 internal_fn ifn = gimple_call_internal_fn (call);
396 int mask_index = internal_fn_mask_index (ifn);
397 if (mask_index >= 0
398 && use == gimple_call_arg (call, mask_index))
399 return true;
400 int stored_value_index = internal_fn_stored_value_index (ifn);
401 if (stored_value_index >= 0
402 && use == gimple_call_arg (call, stored_value_index))
403 return true;
404 if (internal_gather_scatter_fn_p (ifn)
405 && use == gimple_call_arg (call, 1))
406 return true;
408 return false;
411 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
412 return false;
413 operand = gimple_assign_rhs1 (assign);
414 if (TREE_CODE (operand) != SSA_NAME)
415 return false;
417 if (operand == use)
418 return true;
420 return false;
425 Function process_use.
427 Inputs:
428 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433 be performed.
435 Outputs:
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
440 Exceptions:
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
445 we skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
447 "relevant" will be modified accordingly.
449 Return true if everything is as expected. Return false otherwise. */
451 static opt_result
452 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
453 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
454 bool force)
456 stmt_vec_info dstmt_vinfo;
457 basic_block bb, def_bb;
458 enum vect_def_type dt;
460 /* case 1: we are only interested in uses that need to be vectorized. Uses
461 that are used for address computation are not considered relevant. */
462 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
463 return opt_result::success ();
465 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
466 return opt_result::failure_at (stmt_vinfo->stmt,
467 "not vectorized:"
468 " unsupported use in stmt.\n");
470 if (!dstmt_vinfo)
471 return opt_result::success ();
473 def_bb = gimple_bb (dstmt_vinfo->stmt);
475 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
476 DSTMT_VINFO must have already been processed, because this should be the
477 only way that STMT, which is a reduction-phi, was put in the worklist,
478 as there should be no other uses for DSTMT_VINFO in the loop. So we just
479 check that everything is as expected, and we are done. */
480 bb = gimple_bb (stmt_vinfo->stmt);
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
491 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
492 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
610 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
652 use_operand_p use_p;
653 ssa_op_iter iter;
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
704 default:
705 break;
708 if (is_pattern_stmt_p (stmt_vinfo))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
732 for (; i < gimple_num_ops (assign); i++)
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
747 for (i = 0; i < gimple_call_num_args (call); i++)
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 return res;
780 } /* while worklist */
782 return opt_result::success ();
785 /* Compute the prologue cost for invariant or constant operands. */
787 static unsigned
788 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
789 unsigned opno, enum vect_def_type dt,
790 stmt_vector_for_cost *cost_vec)
792 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
793 tree op = gimple_op (stmt, opno);
794 unsigned prologue_cost = 0;
796 /* Without looking at the actual initializer a vector of
797 constants can be implemented as load from the constant pool.
798 When all elements are the same we can use a splat. */
799 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
800 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
801 unsigned num_vects_to_check;
802 unsigned HOST_WIDE_INT const_nunits;
803 unsigned nelt_limit;
804 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
805 && ! multiple_p (const_nunits, group_size))
807 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
808 nelt_limit = const_nunits;
810 else
812 /* If either the vector has variable length or the vectors
813 are composed of repeated whole groups we only need to
814 cost construction once. All vectors will be the same. */
815 num_vects_to_check = 1;
816 nelt_limit = group_size;
818 tree elt = NULL_TREE;
819 unsigned nelt = 0;
820 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
822 unsigned si = j % group_size;
823 if (nelt == 0)
824 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
825 /* ??? We're just tracking whether all operands of a single
826 vector initializer are the same, ideally we'd check if
827 we emitted the same one already. */
828 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
829 opno))
830 elt = NULL_TREE;
831 nelt++;
832 if (nelt == nelt_limit)
834 /* ??? We need to pass down stmt_info for a vector type
835 even if it points to the wrong stmt. */
836 prologue_cost += record_stmt_cost
837 (cost_vec, 1,
838 dt == vect_external_def
839 ? (elt ? scalar_to_vec : vec_construct)
840 : vector_load,
841 stmt_info, 0, vect_prologue);
842 nelt = 0;
846 return prologue_cost;
849 /* Function vect_model_simple_cost.
851 Models cost for simple operations, i.e. those that only emit ncopies of a
852 single op. Right now, this does not account for multiple insns that could
853 be generated for the single vector op. We will handle that shortly. */
855 static void
856 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
857 enum vect_def_type *dt,
858 int ndts,
859 slp_tree node,
860 stmt_vector_for_cost *cost_vec)
862 int inside_cost = 0, prologue_cost = 0;
864 gcc_assert (cost_vec != NULL);
866 /* ??? Somehow we need to fix this at the callers. */
867 if (node)
868 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
870 if (node)
872 /* Scan operands and account for prologue cost of constants/externals.
873 ??? This over-estimates cost for multiple uses and should be
874 re-engineered. */
875 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
876 tree lhs = gimple_get_lhs (stmt);
877 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
879 tree op = gimple_op (stmt, i);
880 enum vect_def_type dt;
881 if (!op || op == lhs)
882 continue;
883 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
884 && (dt == vect_constant_def || dt == vect_external_def))
885 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
886 i, dt, cost_vec);
889 else
890 /* Cost the "broadcast" of a scalar operand in to a vector operand.
891 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
892 cost model. */
893 for (int i = 0; i < ndts; i++)
894 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
895 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
896 stmt_info, 0, vect_prologue);
898 /* Adjust for two-operator SLP nodes. */
899 if (node && SLP_TREE_TWO_OPERATORS (node))
901 ncopies *= 2;
902 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
903 stmt_info, 0, vect_body);
906 /* Pass the inside-of-loop statements to the target-specific cost model. */
907 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
908 stmt_info, 0, vect_body);
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE, vect_location,
912 "vect_model_simple_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost, prologue_cost);
917 /* Model cost for type demotion and promotion operations. PWR is normally
918 zero for single-step promotions and demotions. It will be one if
919 two-step promotion/demotion is required, and so on. Each additional
920 step doubles the number of instructions required. */
922 static void
923 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
924 enum vect_def_type *dt, int pwr,
925 stmt_vector_for_cost *cost_vec)
927 int i, tmp;
928 int inside_cost = 0, prologue_cost = 0;
930 for (i = 0; i < pwr + 1; i++)
932 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
933 (i + 1) : i;
934 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
935 vec_promote_demote, stmt_info, 0,
936 vect_body);
939 /* FORNOW: Assuming maximum 2 args per stmts. */
940 for (i = 0; i < 2; i++)
941 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
942 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
943 stmt_info, 0, vect_prologue);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "vect_model_promotion_demotion_cost: inside_cost = %d, "
948 "prologue_cost = %d .\n", inside_cost, prologue_cost);
951 /* Function vect_model_store_cost
953 Models cost for stores. In the case of grouped accesses, one access
954 has the overhead of the grouped access attributed to it. */
956 static void
957 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
958 enum vect_def_type dt,
959 vect_memory_access_type memory_access_type,
960 vec_load_store_type vls_type, slp_tree slp_node,
961 stmt_vector_for_cost *cost_vec)
963 unsigned int inside_cost = 0, prologue_cost = 0;
964 stmt_vec_info first_stmt_info = stmt_info;
965 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
967 /* ??? Somehow we need to fix this at the callers. */
968 if (slp_node)
969 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
971 if (vls_type == VLS_STORE_INVARIANT)
973 if (slp_node)
974 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
975 1, dt, cost_vec);
976 else
977 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
978 stmt_info, 0, vect_prologue);
981 /* Grouped stores update all elements in the group at once,
982 so we want the DR for the first statement. */
983 if (!slp_node && grouped_access_p)
984 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
986 /* True if we should include any once-per-group costs as well as
987 the cost of the statement itself. For SLP we only get called
988 once per group anyhow. */
989 bool first_stmt_p = (first_stmt_info == stmt_info);
991 /* We assume that the cost of a single store-lanes instruction is
992 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
993 access is instead being provided by a permute-and-store operation,
994 include the cost of the permutes. */
995 if (first_stmt_p
996 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
998 /* Uses a high and low interleave or shuffle operations for each
999 needed permute. */
1000 int group_size = DR_GROUP_SIZE (first_stmt_info);
1001 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1002 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1003 stmt_info, 0, vect_body);
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE, vect_location,
1007 "vect_model_store_cost: strided group_size = %d .\n",
1008 group_size);
1011 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1012 /* Costs of the stores. */
1013 if (memory_access_type == VMAT_ELEMENTWISE
1014 || memory_access_type == VMAT_GATHER_SCATTER)
1016 /* N scalar stores plus extracting the elements. */
1017 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1018 inside_cost += record_stmt_cost (cost_vec,
1019 ncopies * assumed_nunits,
1020 scalar_store, stmt_info, 0, vect_body);
1022 else
1023 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1025 if (memory_access_type == VMAT_ELEMENTWISE
1026 || memory_access_type == VMAT_STRIDED_SLP)
1028 /* N scalar stores plus extracting the elements. */
1029 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1030 inside_cost += record_stmt_cost (cost_vec,
1031 ncopies * assumed_nunits,
1032 vec_to_scalar, stmt_info, 0, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_store_cost: inside_cost = %d, "
1038 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1042 /* Calculate cost of DR's memory access. */
1043 void
1044 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1045 unsigned int *inside_cost,
1046 stmt_vector_for_cost *body_cost_vec)
1048 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1049 int alignment_support_scheme
1050 = vect_supportable_dr_alignment (dr_info, false);
1052 switch (alignment_support_scheme)
1054 case dr_aligned:
1056 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1057 vector_store, stmt_info, 0,
1058 vect_body);
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE, vect_location,
1062 "vect_model_store_cost: aligned.\n");
1063 break;
1066 case dr_unaligned_supported:
1068 /* Here, we assign an additional cost for the unaligned store. */
1069 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1070 unaligned_store, stmt_info,
1071 DR_MISALIGNMENT (dr_info),
1072 vect_body);
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_NOTE, vect_location,
1075 "vect_model_store_cost: unaligned supported by "
1076 "hardware.\n");
1077 break;
1080 case dr_unaligned_unsupported:
1082 *inside_cost = VECT_MAX_COST;
1084 if (dump_enabled_p ())
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1086 "vect_model_store_cost: unsupported access.\n");
1087 break;
1090 default:
1091 gcc_unreachable ();
1096 /* Function vect_model_load_cost
1098 Models cost for loads. In the case of grouped accesses, one access has
1099 the overhead of the grouped access attributed to it. Since unaligned
1100 accesses are supported for loads, we also account for the costs of the
1101 access scheme chosen. */
1103 static void
1104 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1105 vect_memory_access_type memory_access_type,
1106 slp_instance instance,
1107 slp_tree slp_node,
1108 stmt_vector_for_cost *cost_vec)
1110 unsigned int inside_cost = 0, prologue_cost = 0;
1111 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1113 gcc_assert (cost_vec);
1115 /* ??? Somehow we need to fix this at the callers. */
1116 if (slp_node)
1117 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1119 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1121 /* If the load is permuted then the alignment is determined by
1122 the first group element not by the first scalar stmt DR. */
1123 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1124 /* Record the cost for the permutation. */
1125 unsigned n_perms;
1126 unsigned assumed_nunits
1127 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1128 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1129 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1130 slp_vf, instance, true,
1131 &n_perms);
1132 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1133 first_stmt_info, 0, vect_body);
1134 /* And adjust the number of loads performed. This handles
1135 redundancies as well as loads that are later dead. */
1136 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1137 bitmap_clear (perm);
1138 for (unsigned i = 0;
1139 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1140 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1141 ncopies = 0;
1142 bool load_seen = false;
1143 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1145 if (i % assumed_nunits == 0)
1147 if (load_seen)
1148 ncopies++;
1149 load_seen = false;
1151 if (bitmap_bit_p (perm, i))
1152 load_seen = true;
1154 if (load_seen)
1155 ncopies++;
1156 gcc_assert (ncopies
1157 <= (DR_GROUP_SIZE (first_stmt_info)
1158 - DR_GROUP_GAP (first_stmt_info)
1159 + assumed_nunits - 1) / assumed_nunits);
1162 /* Grouped loads read all elements in the group at once,
1163 so we want the DR for the first statement. */
1164 stmt_vec_info first_stmt_info = stmt_info;
1165 if (!slp_node && grouped_access_p)
1166 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1168 /* True if we should include any once-per-group costs as well as
1169 the cost of the statement itself. For SLP we only get called
1170 once per group anyhow. */
1171 bool first_stmt_p = (first_stmt_info == stmt_info);
1173 /* We assume that the cost of a single load-lanes instruction is
1174 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1175 access is instead being provided by a load-and-permute operation,
1176 include the cost of the permutes. */
1177 if (first_stmt_p
1178 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1180 /* Uses an even and odd extract operations or shuffle operations
1181 for each needed permute. */
1182 int group_size = DR_GROUP_SIZE (first_stmt_info);
1183 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1184 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1185 stmt_info, 0, vect_body);
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: strided group_size = %d .\n",
1190 group_size);
1193 /* The loads themselves. */
1194 if (memory_access_type == VMAT_ELEMENTWISE
1195 || memory_access_type == VMAT_GATHER_SCATTER)
1197 /* N scalar loads plus gathering them into a vector. */
1198 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1199 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1200 inside_cost += record_stmt_cost (cost_vec,
1201 ncopies * assumed_nunits,
1202 scalar_load, stmt_info, 0, vect_body);
1204 else
1205 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1206 &inside_cost, &prologue_cost,
1207 cost_vec, cost_vec, true);
1208 if (memory_access_type == VMAT_ELEMENTWISE
1209 || memory_access_type == VMAT_STRIDED_SLP)
1210 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1211 stmt_info, 0, vect_body);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: inside_cost = %d, "
1216 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1220 /* Calculate cost of DR's memory access. */
1221 void
1222 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1223 bool add_realign_cost, unsigned int *inside_cost,
1224 unsigned int *prologue_cost,
1225 stmt_vector_for_cost *prologue_cost_vec,
1226 stmt_vector_for_cost *body_cost_vec,
1227 bool record_prologue_costs)
1229 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1230 int alignment_support_scheme
1231 = vect_supportable_dr_alignment (dr_info, false);
1233 switch (alignment_support_scheme)
1235 case dr_aligned:
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1238 stmt_info, 0, vect_body);
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE, vect_location,
1242 "vect_model_load_cost: aligned.\n");
1244 break;
1246 case dr_unaligned_supported:
1248 /* Here, we assign an additional cost for the unaligned load. */
1249 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1250 unaligned_load, stmt_info,
1251 DR_MISALIGNMENT (dr_info),
1252 vect_body);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1259 break;
1261 case dr_explicit_realign:
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1279 break;
1281 case dr_explicit_realign_optimized:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost && record_prologue_costs)
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1316 break;
1319 case dr_unaligned_unsupported:
1321 *inside_cost = VECT_MAX_COST;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1329 default:
1330 gcc_unreachable ();
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1337 static void
1338 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1341 if (gsi)
1342 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1343 else
1345 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1347 if (loop_vinfo)
1349 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1350 basic_block new_bb;
1351 edge pe;
1353 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1354 loop = loop->inner;
1356 pe = loop_preheader_edge (loop);
1357 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1358 gcc_assert (!new_bb);
1360 else
1362 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1363 basic_block bb;
1364 gimple_stmt_iterator gsi_bb_start;
1366 gcc_assert (bb_vinfo);
1367 bb = BB_VINFO_BB (bb_vinfo);
1368 gsi_bb_start = gsi_after_labels (bb);
1369 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1373 if (dump_enabled_p ())
1374 dump_printf_loc (MSG_NOTE, vect_location,
1375 "created new init_stmt: %G", new_stmt);
1378 /* Function vect_init_vector.
1380 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1381 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1382 vector type a vector with all elements equal to VAL is created first.
1383 Place the initialization at BSI if it is not NULL. Otherwise, place the
1384 initialization at the loop preheader.
1385 Return the DEF of INIT_STMT.
1386 It will be used in the vectorization of STMT_INFO. */
1388 tree
1389 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1390 gimple_stmt_iterator *gsi)
1392 gimple *init_stmt;
1393 tree new_temp;
1395 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1396 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1398 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1399 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1401 /* Scalar boolean value should be transformed into
1402 all zeros or all ones value before building a vector. */
1403 if (VECTOR_BOOLEAN_TYPE_P (type))
1405 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1406 tree false_val = build_zero_cst (TREE_TYPE (type));
1408 if (CONSTANT_CLASS_P (val))
1409 val = integer_zerop (val) ? false_val : true_val;
1410 else
1412 new_temp = make_ssa_name (TREE_TYPE (type));
1413 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1414 val, true_val, false_val);
1415 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1416 val = new_temp;
1419 else if (CONSTANT_CLASS_P (val))
1420 val = fold_convert (TREE_TYPE (type), val);
1421 else
1423 new_temp = make_ssa_name (TREE_TYPE (type));
1424 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1425 init_stmt = gimple_build_assign (new_temp,
1426 fold_build1 (VIEW_CONVERT_EXPR,
1427 TREE_TYPE (type),
1428 val));
1429 else
1430 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1431 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1432 val = new_temp;
1435 val = build_vector_from_val (type, val);
1438 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1439 init_stmt = gimple_build_assign (new_temp, val);
1440 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1441 return new_temp;
1444 /* Function vect_get_vec_def_for_operand_1.
1446 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1447 with type DT that will be used in the vectorized stmt. */
1449 tree
1450 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1451 enum vect_def_type dt)
1453 tree vec_oprnd;
1454 stmt_vec_info vec_stmt_info;
1456 switch (dt)
1458 /* operand is a constant or a loop invariant. */
1459 case vect_constant_def:
1460 case vect_external_def:
1461 /* Code should use vect_get_vec_def_for_operand. */
1462 gcc_unreachable ();
1464 /* Operand is defined by a loop header phi. In case of nested
1465 cycles we also may have uses of the backedge def. */
1466 case vect_reduction_def:
1467 case vect_double_reduction_def:
1468 case vect_nested_cycle:
1469 case vect_induction_def:
1470 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1471 || dt == vect_nested_cycle);
1472 /* Fallthru. */
1474 /* operand is defined inside the loop. */
1475 case vect_internal_def:
1477 /* Get the def from the vectorized stmt. */
1478 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1479 /* Get vectorized pattern statement. */
1480 if (!vec_stmt_info
1481 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1482 && !STMT_VINFO_RELEVANT (def_stmt_info))
1483 vec_stmt_info = (STMT_VINFO_VEC_STMT
1484 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1485 gcc_assert (vec_stmt_info);
1486 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1487 vec_oprnd = PHI_RESULT (phi);
1488 else
1489 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1490 return vec_oprnd;
1493 default:
1494 gcc_unreachable ();
1499 /* Function vect_get_vec_def_for_operand.
1501 OP is an operand in STMT_VINFO. This function returns a (vector) def
1502 that will be used in the vectorized stmt for STMT_VINFO.
1504 In the case that OP is an SSA_NAME which is defined in the loop, then
1505 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1507 In case OP is an invariant or constant, a new stmt that creates a vector def
1508 needs to be introduced. VECTYPE may be used to specify a required type for
1509 vector invariant. */
1511 tree
1512 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1514 gimple *def_stmt;
1515 enum vect_def_type dt;
1516 bool is_simple_use;
1517 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location,
1521 "vect_get_vec_def_for_operand: %T\n", op);
1523 stmt_vec_info def_stmt_info;
1524 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1525 &def_stmt_info, &def_stmt);
1526 gcc_assert (is_simple_use);
1527 if (def_stmt && dump_enabled_p ())
1528 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1530 if (dt == vect_constant_def || dt == vect_external_def)
1532 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1533 tree vector_type;
1535 if (vectype)
1536 vector_type = vectype;
1537 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1538 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1539 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1540 else
1541 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1543 gcc_assert (vector_type);
1544 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1546 else
1547 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1551 /* Function vect_get_vec_def_for_stmt_copy
1553 Return a vector-def for an operand. This function is used when the
1554 vectorized stmt to be created (by the caller to this function) is a "copy"
1555 created in case the vectorized result cannot fit in one vector, and several
1556 copies of the vector-stmt are required. In this case the vector-def is
1557 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1558 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1560 Context:
1561 In case the vectorization factor (VF) is bigger than the number
1562 of elements that can fit in a vectype (nunits), we have to generate
1563 more than one vector stmt to vectorize the scalar stmt. This situation
1564 arises when there are multiple data-types operated upon in the loop; the
1565 smallest data-type determines the VF, and as a result, when vectorizing
1566 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1567 vector stmt (each computing a vector of 'nunits' results, and together
1568 computing 'VF' results in each iteration). This function is called when
1569 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1570 which VF=16 and nunits=4, so the number of copies required is 4):
1572 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1574 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1575 VS1.1: vx.1 = memref1 VS1.2
1576 VS1.2: vx.2 = memref2 VS1.3
1577 VS1.3: vx.3 = memref3
1579 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1580 VSnew.1: vz1 = vx.1 + ... VSnew.2
1581 VSnew.2: vz2 = vx.2 + ... VSnew.3
1582 VSnew.3: vz3 = vx.3 + ...
1584 The vectorization of S1 is explained in vectorizable_load.
1585 The vectorization of S2:
1586 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1587 the function 'vect_get_vec_def_for_operand' is called to
1588 get the relevant vector-def for each operand of S2. For operand x it
1589 returns the vector-def 'vx.0'.
1591 To create the remaining copies of the vector-stmt (VSnew.j), this
1592 function is called to get the relevant vector-def for each operand. It is
1593 obtained from the respective VS1.j stmt, which is recorded in the
1594 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1596 For example, to obtain the vector-def 'vx.1' in order to create the
1597 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1598 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1599 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1600 and return its def ('vx.1').
1601 Overall, to create the above sequence this function will be called 3 times:
1602 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1603 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1604 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1606 tree
1607 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1609 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1610 if (!def_stmt_info)
1611 /* Do nothing; can reuse same def. */
1612 return vec_oprnd;
1614 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1615 gcc_assert (def_stmt_info);
1616 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1617 vec_oprnd = PHI_RESULT (phi);
1618 else
1619 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1620 return vec_oprnd;
1624 /* Get vectorized definitions for the operands to create a copy of an original
1625 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1627 void
1628 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1629 vec<tree> *vec_oprnds0,
1630 vec<tree> *vec_oprnds1)
1632 tree vec_oprnd = vec_oprnds0->pop ();
1634 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1635 vec_oprnds0->quick_push (vec_oprnd);
1637 if (vec_oprnds1 && vec_oprnds1->length ())
1639 vec_oprnd = vec_oprnds1->pop ();
1640 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1641 vec_oprnds1->quick_push (vec_oprnd);
1646 /* Get vectorized definitions for OP0 and OP1. */
1648 void
1649 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1650 vec<tree> *vec_oprnds0,
1651 vec<tree> *vec_oprnds1,
1652 slp_tree slp_node)
1654 if (slp_node)
1656 int nops = (op1 == NULL_TREE) ? 1 : 2;
1657 auto_vec<tree> ops (nops);
1658 auto_vec<vec<tree> > vec_defs (nops);
1660 ops.quick_push (op0);
1661 if (op1)
1662 ops.quick_push (op1);
1664 vect_get_slp_defs (ops, slp_node, &vec_defs);
1666 *vec_oprnds0 = vec_defs[0];
1667 if (op1)
1668 *vec_oprnds1 = vec_defs[1];
1670 else
1672 tree vec_oprnd;
1674 vec_oprnds0->create (1);
1675 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1676 vec_oprnds0->quick_push (vec_oprnd);
1678 if (op1)
1680 vec_oprnds1->create (1);
1681 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1682 vec_oprnds1->quick_push (vec_oprnd);
1687 /* Helper function called by vect_finish_replace_stmt and
1688 vect_finish_stmt_generation. Set the location of the new
1689 statement and create and return a stmt_vec_info for it. */
1691 static stmt_vec_info
1692 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1694 vec_info *vinfo = stmt_info->vinfo;
1696 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1698 if (dump_enabled_p ())
1699 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1701 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1703 /* While EH edges will generally prevent vectorization, stmt might
1704 e.g. be in a must-not-throw region. Ensure newly created stmts
1705 that could throw are part of the same region. */
1706 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1707 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1708 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1710 return vec_stmt_info;
1713 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1714 which sets the same scalar result as STMT_INFO did. Create and return a
1715 stmt_vec_info for VEC_STMT. */
1717 stmt_vec_info
1718 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1720 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1722 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1723 gsi_replace (&gsi, vec_stmt, true);
1725 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1728 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1729 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1731 stmt_vec_info
1732 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1733 gimple_stmt_iterator *gsi)
1735 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1737 if (!gsi_end_p (*gsi)
1738 && gimple_has_mem_ops (vec_stmt))
1740 gimple *at_stmt = gsi_stmt (*gsi);
1741 tree vuse = gimple_vuse (at_stmt);
1742 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1744 tree vdef = gimple_vdef (at_stmt);
1745 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1746 /* If we have an SSA vuse and insert a store, update virtual
1747 SSA form to avoid triggering the renamer. Do so only
1748 if we can easily see all uses - which is what almost always
1749 happens with the way vectorized stmts are inserted. */
1750 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1751 && ((is_gimple_assign (vec_stmt)
1752 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1753 || (is_gimple_call (vec_stmt)
1754 && !(gimple_call_flags (vec_stmt)
1755 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1757 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1758 gimple_set_vdef (vec_stmt, new_vdef);
1759 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1763 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1764 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1767 /* We want to vectorize a call to combined function CFN with function
1768 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1769 as the types of all inputs. Check whether this is possible using
1770 an internal function, returning its code if so or IFN_LAST if not. */
1772 static internal_fn
1773 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1774 tree vectype_out, tree vectype_in)
1776 internal_fn ifn;
1777 if (internal_fn_p (cfn))
1778 ifn = as_internal_fn (cfn);
1779 else
1780 ifn = associated_internal_fn (fndecl);
1781 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1783 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1784 if (info.vectorizable)
1786 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1787 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1788 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1789 OPTIMIZE_FOR_SPEED))
1790 return ifn;
1793 return IFN_LAST;
1797 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1798 gimple_stmt_iterator *);
1800 /* Check whether a load or store statement in the loop described by
1801 LOOP_VINFO is possible in a fully-masked loop. This is testing
1802 whether the vectorizer pass has the appropriate support, as well as
1803 whether the target does.
1805 VLS_TYPE says whether the statement is a load or store and VECTYPE
1806 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1807 says how the load or store is going to be implemented and GROUP_SIZE
1808 is the number of load or store statements in the containing group.
1809 If the access is a gather load or scatter store, GS_INFO describes
1810 its arguments.
1812 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1813 supported, otherwise record the required mask types. */
1815 static void
1816 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1817 vec_load_store_type vls_type, int group_size,
1818 vect_memory_access_type memory_access_type,
1819 gather_scatter_info *gs_info)
1821 /* Invariant loads need no special support. */
1822 if (memory_access_type == VMAT_INVARIANT)
1823 return;
1825 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1826 machine_mode vecmode = TYPE_MODE (vectype);
1827 bool is_load = (vls_type == VLS_LOAD);
1828 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1830 if (is_load
1831 ? !vect_load_lanes_supported (vectype, group_size, true)
1832 : !vect_store_lanes_supported (vectype, group_size, true))
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1836 "can't use a fully-masked loop because the"
1837 " target doesn't have an appropriate masked"
1838 " load/store-lanes instruction.\n");
1839 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1840 return;
1842 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1843 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1844 return;
1847 if (memory_access_type == VMAT_GATHER_SCATTER)
1849 internal_fn ifn = (is_load
1850 ? IFN_MASK_GATHER_LOAD
1851 : IFN_MASK_SCATTER_STORE);
1852 tree offset_type = TREE_TYPE (gs_info->offset);
1853 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1854 gs_info->memory_type,
1855 TYPE_SIGN (offset_type),
1856 gs_info->scale))
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860 "can't use a fully-masked loop because the"
1861 " target doesn't have an appropriate masked"
1862 " gather load or scatter store instruction.\n");
1863 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1864 return;
1866 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1867 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1868 return;
1871 if (memory_access_type != VMAT_CONTIGUOUS
1872 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1874 /* Element X of the data must come from iteration i * VF + X of the
1875 scalar loop. We need more work to support other mappings. */
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878 "can't use a fully-masked loop because an access"
1879 " isn't contiguous.\n");
1880 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1881 return;
1884 machine_mode mask_mode;
1885 if (!(targetm.vectorize.get_mask_mode
1886 (GET_MODE_NUNITS (vecmode),
1887 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1888 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1890 if (dump_enabled_p ())
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1892 "can't use a fully-masked loop because the target"
1893 " doesn't have the appropriate masked load or"
1894 " store.\n");
1895 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1896 return;
1898 /* We might load more scalars than we need for permuting SLP loads.
1899 We checked in get_group_load_store_type that the extra elements
1900 don't leak into a new vector. */
1901 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1902 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1903 unsigned int nvectors;
1904 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1905 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1906 else
1907 gcc_unreachable ();
1910 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1911 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1912 that needs to be applied to all loads and stores in a vectorized loop.
1913 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1915 MASK_TYPE is the type of both masks. If new statements are needed,
1916 insert them before GSI. */
1918 static tree
1919 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1920 gimple_stmt_iterator *gsi)
1922 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1923 if (!loop_mask)
1924 return vec_mask;
1926 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1927 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1928 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1929 vec_mask, loop_mask);
1930 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1931 return and_res;
1934 /* Determine whether we can use a gather load or scatter store to vectorize
1935 strided load or store STMT_INFO by truncating the current offset to a
1936 smaller width. We need to be able to construct an offset vector:
1938 { 0, X, X*2, X*3, ... }
1940 without loss of precision, where X is STMT_INFO's DR_STEP.
1942 Return true if this is possible, describing the gather load or scatter
1943 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1945 static bool
1946 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1947 loop_vec_info loop_vinfo, bool masked_p,
1948 gather_scatter_info *gs_info)
1950 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1951 data_reference *dr = dr_info->dr;
1952 tree step = DR_STEP (dr);
1953 if (TREE_CODE (step) != INTEGER_CST)
1955 /* ??? Perhaps we could use range information here? */
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "cannot truncate variable step.\n");
1959 return false;
1962 /* Get the number of bits in an element. */
1963 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1964 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1965 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1967 /* Set COUNT to the upper limit on the number of elements - 1.
1968 Start with the maximum vectorization factor. */
1969 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1971 /* Try lowering COUNT to the number of scalar latch iterations. */
1972 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1973 widest_int max_iters;
1974 if (max_loop_iterations (loop, &max_iters)
1975 && max_iters < count)
1976 count = max_iters.to_shwi ();
1978 /* Try scales of 1 and the element size. */
1979 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1980 wi::overflow_type overflow = wi::OVF_NONE;
1981 for (int i = 0; i < 2; ++i)
1983 int scale = scales[i];
1984 widest_int factor;
1985 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1986 continue;
1988 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1989 in OFFSET_BITS bits. */
1990 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1991 if (overflow)
1992 continue;
1993 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1994 if (wi::min_precision (range, sign) > element_bits)
1996 overflow = wi::OVF_UNKNOWN;
1997 continue;
2000 /* See whether the target supports the operation. */
2001 tree memory_type = TREE_TYPE (DR_REF (dr));
2002 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2003 memory_type, element_bits, sign, scale,
2004 &gs_info->ifn, &gs_info->element_type))
2005 continue;
2007 tree offset_type = build_nonstandard_integer_type (element_bits,
2008 sign == UNSIGNED);
2010 gs_info->decl = NULL_TREE;
2011 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2012 but we don't need to store that here. */
2013 gs_info->base = NULL_TREE;
2014 gs_info->offset = fold_convert (offset_type, step);
2015 gs_info->offset_dt = vect_constant_def;
2016 gs_info->offset_vectype = NULL_TREE;
2017 gs_info->scale = scale;
2018 gs_info->memory_type = memory_type;
2019 return true;
2022 if (overflow && dump_enabled_p ())
2023 dump_printf_loc (MSG_NOTE, vect_location,
2024 "truncating gather/scatter offset to %d bits"
2025 " might change its value.\n", element_bits);
2027 return false;
2030 /* Return true if we can use gather/scatter internal functions to
2031 vectorize STMT_INFO, which is a grouped or strided load or store.
2032 MASKED_P is true if load or store is conditional. When returning
2033 true, fill in GS_INFO with the information required to perform the
2034 operation. */
2036 static bool
2037 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2038 loop_vec_info loop_vinfo, bool masked_p,
2039 gather_scatter_info *gs_info)
2041 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2042 || gs_info->decl)
2043 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2044 masked_p, gs_info);
2046 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2047 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2048 tree offset_type = TREE_TYPE (gs_info->offset);
2049 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2051 /* Enforced by vect_check_gather_scatter. */
2052 gcc_assert (element_bits >= offset_bits);
2054 /* If the elements are wider than the offset, convert the offset to the
2055 same width, without changing its sign. */
2056 if (element_bits > offset_bits)
2058 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2059 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2060 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_NOTE, vect_location,
2065 "using gather/scatter for strided/grouped access,"
2066 " scale = %d\n", gs_info->scale);
2068 return true;
2071 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2072 elements with a known constant step. Return -1 if that step
2073 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2075 static int
2076 compare_step_with_zero (stmt_vec_info stmt_info)
2078 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2079 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2080 size_zero_node);
2083 /* If the target supports a permute mask that reverses the elements in
2084 a vector of type VECTYPE, return that mask, otherwise return null. */
2086 static tree
2087 perm_mask_for_reverse (tree vectype)
2089 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2091 /* The encoding has a single stepped pattern. */
2092 vec_perm_builder sel (nunits, 1, 3);
2093 for (int i = 0; i < 3; ++i)
2094 sel.quick_push (nunits - 1 - i);
2096 vec_perm_indices indices (sel, 1, nunits);
2097 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2098 return NULL_TREE;
2099 return vect_gen_perm_mask_checked (vectype, indices);
2102 /* STMT_INFO is either a masked or unconditional store. Return the value
2103 being stored. */
2105 tree
2106 vect_get_store_rhs (stmt_vec_info stmt_info)
2108 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2110 gcc_assert (gimple_assign_single_p (assign));
2111 return gimple_assign_rhs1 (assign);
2113 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2115 internal_fn ifn = gimple_call_internal_fn (call);
2116 int index = internal_fn_stored_value_index (ifn);
2117 gcc_assert (index >= 0);
2118 return gimple_call_arg (call, index);
2120 gcc_unreachable ();
2123 /* A subroutine of get_load_store_type, with a subset of the same
2124 arguments. Handle the case where STMT_INFO is part of a grouped load
2125 or store.
2127 For stores, the statements in the group are all consecutive
2128 and there is no gap at the end. For loads, the statements in the
2129 group might not be consecutive; there can be gaps between statements
2130 as well as at the end. */
2132 static bool
2133 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2134 bool masked_p, vec_load_store_type vls_type,
2135 vect_memory_access_type *memory_access_type,
2136 gather_scatter_info *gs_info)
2138 vec_info *vinfo = stmt_info->vinfo;
2139 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2140 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2141 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2142 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2143 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2144 bool single_element_p = (stmt_info == first_stmt_info
2145 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2146 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2147 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2149 /* True if the vectorized statements would access beyond the last
2150 statement in the group. */
2151 bool overrun_p = false;
2153 /* True if we can cope with such overrun by peeling for gaps, so that
2154 there is at least one final scalar iteration after the vector loop. */
2155 bool can_overrun_p = (!masked_p
2156 && vls_type == VLS_LOAD
2157 && loop_vinfo
2158 && !loop->inner);
2160 /* There can only be a gap at the end of the group if the stride is
2161 known at compile time. */
2162 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2164 /* Stores can't yet have gaps. */
2165 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2167 if (slp)
2169 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2171 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2172 separated by the stride, until we have a complete vector.
2173 Fall back to scalar accesses if that isn't possible. */
2174 if (multiple_p (nunits, group_size))
2175 *memory_access_type = VMAT_STRIDED_SLP;
2176 else
2177 *memory_access_type = VMAT_ELEMENTWISE;
2179 else
2181 overrun_p = loop_vinfo && gap != 0;
2182 if (overrun_p && vls_type != VLS_LOAD)
2184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2185 "Grouped store with gaps requires"
2186 " non-consecutive accesses\n");
2187 return false;
2189 /* An overrun is fine if the trailing elements are smaller
2190 than the alignment boundary B. Every vector access will
2191 be a multiple of B and so we are guaranteed to access a
2192 non-gap element in the same B-sized block. */
2193 if (overrun_p
2194 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2195 / vect_get_scalar_dr_size (first_dr_info)))
2196 overrun_p = false;
2197 if (overrun_p && !can_overrun_p)
2199 if (dump_enabled_p ())
2200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2201 "Peeling for outer loop is not supported\n");
2202 return false;
2204 *memory_access_type = VMAT_CONTIGUOUS;
2207 else
2209 /* We can always handle this case using elementwise accesses,
2210 but see if something more efficient is available. */
2211 *memory_access_type = VMAT_ELEMENTWISE;
2213 /* If there is a gap at the end of the group then these optimizations
2214 would access excess elements in the last iteration. */
2215 bool would_overrun_p = (gap != 0);
2216 /* An overrun is fine if the trailing elements are smaller than the
2217 alignment boundary B. Every vector access will be a multiple of B
2218 and so we are guaranteed to access a non-gap element in the
2219 same B-sized block. */
2220 if (would_overrun_p
2221 && !masked_p
2222 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2223 / vect_get_scalar_dr_size (first_dr_info)))
2224 would_overrun_p = false;
2226 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2227 && (can_overrun_p || !would_overrun_p)
2228 && compare_step_with_zero (stmt_info) > 0)
2230 /* First cope with the degenerate case of a single-element
2231 vector. */
2232 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2233 *memory_access_type = VMAT_CONTIGUOUS;
2235 /* Otherwise try using LOAD/STORE_LANES. */
2236 if (*memory_access_type == VMAT_ELEMENTWISE
2237 && (vls_type == VLS_LOAD
2238 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2239 : vect_store_lanes_supported (vectype, group_size,
2240 masked_p)))
2242 *memory_access_type = VMAT_LOAD_STORE_LANES;
2243 overrun_p = would_overrun_p;
2246 /* If that fails, try using permuting loads. */
2247 if (*memory_access_type == VMAT_ELEMENTWISE
2248 && (vls_type == VLS_LOAD
2249 ? vect_grouped_load_supported (vectype, single_element_p,
2250 group_size)
2251 : vect_grouped_store_supported (vectype, group_size)))
2253 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2254 overrun_p = would_overrun_p;
2258 /* As a last resort, trying using a gather load or scatter store.
2260 ??? Although the code can handle all group sizes correctly,
2261 it probably isn't a win to use separate strided accesses based
2262 on nearby locations. Or, even if it's a win over scalar code,
2263 it might not be a win over vectorizing at a lower VF, if that
2264 allows us to use contiguous accesses. */
2265 if (*memory_access_type == VMAT_ELEMENTWISE
2266 && single_element_p
2267 && loop_vinfo
2268 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2269 masked_p, gs_info))
2270 *memory_access_type = VMAT_GATHER_SCATTER;
2273 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2275 /* STMT is the leader of the group. Check the operands of all the
2276 stmts of the group. */
2277 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2278 while (next_stmt_info)
2280 tree op = vect_get_store_rhs (next_stmt_info);
2281 enum vect_def_type dt;
2282 if (!vect_is_simple_use (op, vinfo, &dt))
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "use not simple.\n");
2287 return false;
2289 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2293 if (overrun_p)
2295 gcc_assert (can_overrun_p);
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "Data access with gaps requires scalar "
2299 "epilogue loop\n");
2300 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2303 return true;
2306 /* A subroutine of get_load_store_type, with a subset of the same
2307 arguments. Handle the case where STMT_INFO is a load or store that
2308 accesses consecutive elements with a negative step. */
2310 static vect_memory_access_type
2311 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2312 vec_load_store_type vls_type,
2313 unsigned int ncopies)
2315 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2316 dr_alignment_support alignment_support_scheme;
2318 if (ncopies > 1)
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322 "multiple types with negative step.\n");
2323 return VMAT_ELEMENTWISE;
2326 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2327 if (alignment_support_scheme != dr_aligned
2328 && alignment_support_scheme != dr_unaligned_supported)
2330 if (dump_enabled_p ())
2331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2332 "negative step but alignment required.\n");
2333 return VMAT_ELEMENTWISE;
2336 if (vls_type == VLS_STORE_INVARIANT)
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_NOTE, vect_location,
2340 "negative step with invariant source;"
2341 " no permute needed.\n");
2342 return VMAT_CONTIGUOUS_DOWN;
2345 if (!perm_mask_for_reverse (vectype))
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2349 "negative step and reversing not supported.\n");
2350 return VMAT_ELEMENTWISE;
2353 return VMAT_CONTIGUOUS_REVERSE;
2356 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2357 if there is a memory access type that the vectorized form can use,
2358 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2359 or scatters, fill in GS_INFO accordingly.
2361 SLP says whether we're performing SLP rather than loop vectorization.
2362 MASKED_P is true if the statement is conditional on a vectorized mask.
2363 VECTYPE is the vector type that the vectorized statements will use.
2364 NCOPIES is the number of vector statements that will be needed. */
2366 static bool
2367 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2368 bool masked_p, vec_load_store_type vls_type,
2369 unsigned int ncopies,
2370 vect_memory_access_type *memory_access_type,
2371 gather_scatter_info *gs_info)
2373 vec_info *vinfo = stmt_info->vinfo;
2374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2375 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2376 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2378 *memory_access_type = VMAT_GATHER_SCATTER;
2379 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2380 gcc_unreachable ();
2381 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2382 &gs_info->offset_dt,
2383 &gs_info->offset_vectype))
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "%s index use not simple.\n",
2388 vls_type == VLS_LOAD ? "gather" : "scatter");
2389 return false;
2392 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2394 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2395 vls_type, memory_access_type, gs_info))
2396 return false;
2398 else if (STMT_VINFO_STRIDED_P (stmt_info))
2400 gcc_assert (!slp);
2401 if (loop_vinfo
2402 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2403 masked_p, gs_info))
2404 *memory_access_type = VMAT_GATHER_SCATTER;
2405 else
2406 *memory_access_type = VMAT_ELEMENTWISE;
2408 else
2410 int cmp = compare_step_with_zero (stmt_info);
2411 if (cmp < 0)
2412 *memory_access_type = get_negative_load_store_type
2413 (stmt_info, vectype, vls_type, ncopies);
2414 else if (cmp == 0)
2416 gcc_assert (vls_type == VLS_LOAD);
2417 *memory_access_type = VMAT_INVARIANT;
2419 else
2420 *memory_access_type = VMAT_CONTIGUOUS;
2423 if ((*memory_access_type == VMAT_ELEMENTWISE
2424 || *memory_access_type == VMAT_STRIDED_SLP)
2425 && !nunits.is_constant ())
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "Not using elementwise accesses due to variable "
2430 "vectorization factor.\n");
2431 return false;
2434 /* FIXME: At the moment the cost model seems to underestimate the
2435 cost of using elementwise accesses. This check preserves the
2436 traditional behavior until that can be fixed. */
2437 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2438 if (!first_stmt_info)
2439 first_stmt_info = stmt_info;
2440 if (*memory_access_type == VMAT_ELEMENTWISE
2441 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2442 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2443 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2444 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2448 "not falling back to elementwise accesses\n");
2449 return false;
2451 return true;
2454 /* Return true if boolean argument MASK is suitable for vectorizing
2455 conditional load or store STMT_INFO. When returning true, store the type
2456 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2457 in *MASK_VECTYPE_OUT. */
2459 static bool
2460 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2461 vect_def_type *mask_dt_out,
2462 tree *mask_vectype_out)
2464 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2466 if (dump_enabled_p ())
2467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2468 "mask argument is not a boolean.\n");
2469 return false;
2472 if (TREE_CODE (mask) != SSA_NAME)
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2476 "mask argument is not an SSA name.\n");
2477 return false;
2480 enum vect_def_type mask_dt;
2481 tree mask_vectype;
2482 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2486 "mask use not simple.\n");
2487 return false;
2490 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2491 if (!mask_vectype)
2492 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2494 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2498 "could not find an appropriate vector mask type.\n");
2499 return false;
2502 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2503 TYPE_VECTOR_SUBPARTS (vectype)))
2505 if (dump_enabled_p ())
2506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2507 "vector mask type %T",
2508 " does not match vector data type %T.\n",
2509 mask_vectype, vectype);
2511 return false;
2514 *mask_dt_out = mask_dt;
2515 *mask_vectype_out = mask_vectype;
2516 return true;
2519 /* Return true if stored value RHS is suitable for vectorizing store
2520 statement STMT_INFO. When returning true, store the type of the
2521 definition in *RHS_DT_OUT, the type of the vectorized store value in
2522 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2524 static bool
2525 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2526 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2527 vec_load_store_type *vls_type_out)
2529 /* In the case this is a store from a constant make sure
2530 native_encode_expr can handle it. */
2531 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2533 if (dump_enabled_p ())
2534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2535 "cannot encode constant as a byte sequence.\n");
2536 return false;
2539 enum vect_def_type rhs_dt;
2540 tree rhs_vectype;
2541 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "use not simple.\n");
2546 return false;
2549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2550 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2552 if (dump_enabled_p ())
2553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2554 "incompatible vector types.\n");
2555 return false;
2558 *rhs_dt_out = rhs_dt;
2559 *rhs_vectype_out = rhs_vectype;
2560 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2561 *vls_type_out = VLS_STORE_INVARIANT;
2562 else
2563 *vls_type_out = VLS_STORE;
2564 return true;
2567 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2568 Note that we support masks with floating-point type, in which case the
2569 floats are interpreted as a bitmask. */
2571 static tree
2572 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2574 if (TREE_CODE (masktype) == INTEGER_TYPE)
2575 return build_int_cst (masktype, -1);
2576 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2578 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2579 mask = build_vector_from_val (masktype, mask);
2580 return vect_init_vector (stmt_info, mask, masktype, NULL);
2582 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2584 REAL_VALUE_TYPE r;
2585 long tmp[6];
2586 for (int j = 0; j < 6; ++j)
2587 tmp[j] = -1;
2588 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2589 tree mask = build_real (TREE_TYPE (masktype), r);
2590 mask = build_vector_from_val (masktype, mask);
2591 return vect_init_vector (stmt_info, mask, masktype, NULL);
2593 gcc_unreachable ();
2596 /* Build an all-zero merge value of type VECTYPE while vectorizing
2597 STMT_INFO as a gather load. */
2599 static tree
2600 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2602 tree merge;
2603 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2604 merge = build_int_cst (TREE_TYPE (vectype), 0);
2605 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2607 REAL_VALUE_TYPE r;
2608 long tmp[6];
2609 for (int j = 0; j < 6; ++j)
2610 tmp[j] = 0;
2611 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2612 merge = build_real (TREE_TYPE (vectype), r);
2614 else
2615 gcc_unreachable ();
2616 merge = build_vector_from_val (vectype, merge);
2617 return vect_init_vector (stmt_info, merge, vectype, NULL);
2620 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2621 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2622 the gather load operation. If the load is conditional, MASK is the
2623 unvectorized condition and MASK_DT is its definition type, otherwise
2624 MASK is null. */
2626 static void
2627 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2628 gimple_stmt_iterator *gsi,
2629 stmt_vec_info *vec_stmt,
2630 gather_scatter_info *gs_info,
2631 tree mask)
2633 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2634 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2635 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2636 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2637 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2638 edge pe = loop_preheader_edge (loop);
2639 enum { NARROW, NONE, WIDEN } modifier;
2640 poly_uint64 gather_off_nunits
2641 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2643 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2644 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2645 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2646 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2647 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2648 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2649 tree scaletype = TREE_VALUE (arglist);
2650 tree real_masktype = masktype;
2651 gcc_checking_assert (types_compatible_p (srctype, rettype)
2652 && (!mask
2653 || TREE_CODE (masktype) == INTEGER_TYPE
2654 || types_compatible_p (srctype, masktype)));
2655 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2656 masktype = build_same_sized_truth_vector_type (srctype);
2658 tree perm_mask = NULL_TREE;
2659 tree mask_perm_mask = NULL_TREE;
2660 if (known_eq (nunits, gather_off_nunits))
2661 modifier = NONE;
2662 else if (known_eq (nunits * 2, gather_off_nunits))
2664 modifier = WIDEN;
2666 /* Currently widening gathers and scatters are only supported for
2667 fixed-length vectors. */
2668 int count = gather_off_nunits.to_constant ();
2669 vec_perm_builder sel (count, count, 1);
2670 for (int i = 0; i < count; ++i)
2671 sel.quick_push (i | (count / 2));
2673 vec_perm_indices indices (sel, 1, count);
2674 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2675 indices);
2677 else if (known_eq (nunits, gather_off_nunits * 2))
2679 modifier = NARROW;
2681 /* Currently narrowing gathers and scatters are only supported for
2682 fixed-length vectors. */
2683 int count = nunits.to_constant ();
2684 vec_perm_builder sel (count, count, 1);
2685 sel.quick_grow (count);
2686 for (int i = 0; i < count; ++i)
2687 sel[i] = i < count / 2 ? i : i + count / 2;
2688 vec_perm_indices indices (sel, 2, count);
2689 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2691 ncopies *= 2;
2693 if (mask)
2695 for (int i = 0; i < count; ++i)
2696 sel[i] = i | (count / 2);
2697 indices.new_vector (sel, 2, count);
2698 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2701 else
2702 gcc_unreachable ();
2704 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2705 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2707 tree ptr = fold_convert (ptrtype, gs_info->base);
2708 if (!is_gimple_min_invariant (ptr))
2710 gimple_seq seq;
2711 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2712 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2713 gcc_assert (!new_bb);
2716 tree scale = build_int_cst (scaletype, gs_info->scale);
2718 tree vec_oprnd0 = NULL_TREE;
2719 tree vec_mask = NULL_TREE;
2720 tree src_op = NULL_TREE;
2721 tree mask_op = NULL_TREE;
2722 tree prev_res = NULL_TREE;
2723 stmt_vec_info prev_stmt_info = NULL;
2725 if (!mask)
2727 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2728 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2731 for (int j = 0; j < ncopies; ++j)
2733 tree op, var;
2734 if (modifier == WIDEN && (j & 1))
2735 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2736 perm_mask, stmt_info, gsi);
2737 else if (j == 0)
2738 op = vec_oprnd0
2739 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2740 else
2741 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2742 vec_oprnd0);
2744 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2746 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2747 TYPE_VECTOR_SUBPARTS (idxtype)));
2748 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2749 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2750 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2751 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2752 op = var;
2755 if (mask)
2757 if (mask_perm_mask && (j & 1))
2758 mask_op = permute_vec_elements (mask_op, mask_op,
2759 mask_perm_mask, stmt_info, gsi);
2760 else
2762 if (j == 0)
2763 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2764 else
2765 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2766 vec_mask);
2768 mask_op = vec_mask;
2769 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2771 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2772 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2773 gcc_assert (known_eq (sub1, sub2));
2774 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2775 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2776 gassign *new_stmt
2777 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2778 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2779 mask_op = var;
2782 src_op = mask_op;
2785 tree mask_arg = mask_op;
2786 if (masktype != real_masktype)
2788 tree utype;
2789 if (TYPE_MODE (real_masktype) == TYPE_MODE (masktype))
2790 utype = real_masktype;
2791 else
2792 utype = lang_hooks.types.type_for_mode (TYPE_MODE (masktype), 1);
2793 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2794 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2795 gassign *new_stmt
2796 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2797 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2798 mask_arg = var;
2799 if (!useless_type_conversion_p (real_masktype, utype))
2801 gcc_assert (TYPE_PRECISION (utype)
2802 <= TYPE_PRECISION (real_masktype));
2803 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2804 new_stmt = gimple_build_assign (var, NOP_EXPR, utype);
2805 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2806 mask_arg = var;
2808 src_op = build_zero_cst (srctype);
2810 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2811 mask_arg, scale);
2813 stmt_vec_info new_stmt_info;
2814 if (!useless_type_conversion_p (vectype, rettype))
2816 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2817 TYPE_VECTOR_SUBPARTS (rettype)));
2818 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2819 gimple_call_set_lhs (new_call, op);
2820 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2821 var = make_ssa_name (vec_dest);
2822 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2823 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2824 new_stmt_info
2825 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2827 else
2829 var = make_ssa_name (vec_dest, new_call);
2830 gimple_call_set_lhs (new_call, var);
2831 new_stmt_info
2832 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2835 if (modifier == NARROW)
2837 if ((j & 1) == 0)
2839 prev_res = var;
2840 continue;
2842 var = permute_vec_elements (prev_res, var, perm_mask,
2843 stmt_info, gsi);
2844 new_stmt_info = loop_vinfo->lookup_def (var);
2847 if (prev_stmt_info == NULL)
2848 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2849 else
2850 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2851 prev_stmt_info = new_stmt_info;
2855 /* Prepare the base and offset in GS_INFO for vectorization.
2856 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2857 to the vectorized offset argument for the first copy of STMT_INFO.
2858 STMT_INFO is the statement described by GS_INFO and LOOP is the
2859 containing loop. */
2861 static void
2862 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2863 gather_scatter_info *gs_info,
2864 tree *dataref_ptr, tree *vec_offset)
2866 gimple_seq stmts = NULL;
2867 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2868 if (stmts != NULL)
2870 basic_block new_bb;
2871 edge pe = loop_preheader_edge (loop);
2872 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2873 gcc_assert (!new_bb);
2875 tree offset_type = TREE_TYPE (gs_info->offset);
2876 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2877 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2878 offset_vectype);
2881 /* Prepare to implement a grouped or strided load or store using
2882 the gather load or scatter store operation described by GS_INFO.
2883 STMT_INFO is the load or store statement.
2885 Set *DATAREF_BUMP to the amount that should be added to the base
2886 address after each copy of the vectorized statement. Set *VEC_OFFSET
2887 to an invariant offset vector in which element I has the value
2888 I * DR_STEP / SCALE. */
2890 static void
2891 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2892 loop_vec_info loop_vinfo,
2893 gather_scatter_info *gs_info,
2894 tree *dataref_bump, tree *vec_offset)
2896 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2897 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2898 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2899 gimple_seq stmts;
2901 tree bump = size_binop (MULT_EXPR,
2902 fold_convert (sizetype, DR_STEP (dr)),
2903 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2904 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2905 if (stmts)
2906 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2908 /* The offset given in GS_INFO can have pointer type, so use the element
2909 type of the vector instead. */
2910 tree offset_type = TREE_TYPE (gs_info->offset);
2911 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2912 offset_type = TREE_TYPE (offset_vectype);
2914 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2915 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2916 ssize_int (gs_info->scale));
2917 step = fold_convert (offset_type, step);
2918 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2920 /* Create {0, X, X*2, X*3, ...}. */
2921 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2922 build_zero_cst (offset_type), step);
2923 if (stmts)
2924 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2927 /* Return the amount that should be added to a vector pointer to move
2928 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2929 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2930 vectorization. */
2932 static tree
2933 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
2934 vect_memory_access_type memory_access_type)
2936 if (memory_access_type == VMAT_INVARIANT)
2937 return size_zero_node;
2939 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2940 tree step = vect_dr_behavior (dr_info)->step;
2941 if (tree_int_cst_sgn (step) == -1)
2942 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2943 return iv_step;
2946 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2948 static bool
2949 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2950 stmt_vec_info *vec_stmt, slp_tree slp_node,
2951 tree vectype_in, stmt_vector_for_cost *cost_vec)
2953 tree op, vectype;
2954 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2955 vec_info *vinfo = stmt_info->vinfo;
2956 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2957 unsigned ncopies;
2959 op = gimple_call_arg (stmt, 0);
2960 vectype = STMT_VINFO_VECTYPE (stmt_info);
2961 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2963 /* Multiple types in SLP are handled by creating the appropriate number of
2964 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2965 case of SLP. */
2966 if (slp_node)
2967 ncopies = 1;
2968 else
2969 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2971 gcc_assert (ncopies >= 1);
2973 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2974 if (! char_vectype)
2975 return false;
2977 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2978 unsigned word_bytes;
2979 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2980 return false;
2982 /* The encoding uses one stepped pattern for each byte in the word. */
2983 vec_perm_builder elts (num_bytes, word_bytes, 3);
2984 for (unsigned i = 0; i < 3; ++i)
2985 for (unsigned j = 0; j < word_bytes; ++j)
2986 elts.quick_push ((i + 1) * word_bytes - j - 1);
2988 vec_perm_indices indices (elts, 1, num_bytes);
2989 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2990 return false;
2992 if (! vec_stmt)
2994 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2995 DUMP_VECT_SCOPE ("vectorizable_bswap");
2996 if (! slp_node)
2998 record_stmt_cost (cost_vec,
2999 1, vector_stmt, stmt_info, 0, vect_prologue);
3000 record_stmt_cost (cost_vec,
3001 ncopies, vec_perm, stmt_info, 0, vect_body);
3003 return true;
3006 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3008 /* Transform. */
3009 vec<tree> vec_oprnds = vNULL;
3010 stmt_vec_info new_stmt_info = NULL;
3011 stmt_vec_info prev_stmt_info = NULL;
3012 for (unsigned j = 0; j < ncopies; j++)
3014 /* Handle uses. */
3015 if (j == 0)
3016 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3017 else
3018 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3020 /* Arguments are ready. create the new vector stmt. */
3021 unsigned i;
3022 tree vop;
3023 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3025 gimple *new_stmt;
3026 tree tem = make_ssa_name (char_vectype);
3027 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3028 char_vectype, vop));
3029 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3030 tree tem2 = make_ssa_name (char_vectype);
3031 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3032 tem, tem, bswap_vconst);
3033 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3034 tem = make_ssa_name (vectype);
3035 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3036 vectype, tem2));
3037 new_stmt_info
3038 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3039 if (slp_node)
3040 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3043 if (slp_node)
3044 continue;
3046 if (j == 0)
3047 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3048 else
3049 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3051 prev_stmt_info = new_stmt_info;
3054 vec_oprnds.release ();
3055 return true;
3058 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3059 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3060 in a single step. On success, store the binary pack code in
3061 *CONVERT_CODE. */
3063 static bool
3064 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3065 tree_code *convert_code)
3067 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3068 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3069 return false;
3071 tree_code code;
3072 int multi_step_cvt = 0;
3073 auto_vec <tree, 8> interm_types;
3074 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3075 &code, &multi_step_cvt,
3076 &interm_types)
3077 || multi_step_cvt)
3078 return false;
3080 *convert_code = code;
3081 return true;
3084 /* Function vectorizable_call.
3086 Check if STMT_INFO performs a function call that can be vectorized.
3087 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3088 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3089 Return true if STMT_INFO is vectorizable in this way. */
3091 static bool
3092 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3093 stmt_vec_info *vec_stmt, slp_tree slp_node,
3094 stmt_vector_for_cost *cost_vec)
3096 gcall *stmt;
3097 tree vec_dest;
3098 tree scalar_dest;
3099 tree op;
3100 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3101 stmt_vec_info prev_stmt_info;
3102 tree vectype_out, vectype_in;
3103 poly_uint64 nunits_in;
3104 poly_uint64 nunits_out;
3105 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3106 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3107 vec_info *vinfo = stmt_info->vinfo;
3108 tree fndecl, new_temp, rhs_type;
3109 enum vect_def_type dt[4]
3110 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3111 vect_unknown_def_type };
3112 int ndts = ARRAY_SIZE (dt);
3113 int ncopies, j;
3114 auto_vec<tree, 8> vargs;
3115 auto_vec<tree, 8> orig_vargs;
3116 enum { NARROW, NONE, WIDEN } modifier;
3117 size_t i, nargs;
3118 tree lhs;
3120 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3121 return false;
3123 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3124 && ! vec_stmt)
3125 return false;
3127 /* Is STMT_INFO a vectorizable call? */
3128 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3129 if (!stmt)
3130 return false;
3132 if (gimple_call_internal_p (stmt)
3133 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3134 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3135 /* Handled by vectorizable_load and vectorizable_store. */
3136 return false;
3138 if (gimple_call_lhs (stmt) == NULL_TREE
3139 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3140 return false;
3142 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3144 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3146 /* Process function arguments. */
3147 rhs_type = NULL_TREE;
3148 vectype_in = NULL_TREE;
3149 nargs = gimple_call_num_args (stmt);
3151 /* Bail out if the function has more than three arguments, we do not have
3152 interesting builtin functions to vectorize with more than two arguments
3153 except for fma. No arguments is also not good. */
3154 if (nargs == 0 || nargs > 4)
3155 return false;
3157 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3158 combined_fn cfn = gimple_call_combined_fn (stmt);
3159 if (cfn == CFN_GOMP_SIMD_LANE)
3161 nargs = 0;
3162 rhs_type = unsigned_type_node;
3165 int mask_opno = -1;
3166 if (internal_fn_p (cfn))
3167 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3169 for (i = 0; i < nargs; i++)
3171 tree opvectype;
3173 op = gimple_call_arg (stmt, i);
3174 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3176 if (dump_enabled_p ())
3177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3178 "use not simple.\n");
3179 return false;
3182 /* Skip the mask argument to an internal function. This operand
3183 has been converted via a pattern if necessary. */
3184 if ((int) i == mask_opno)
3185 continue;
3187 /* We can only handle calls with arguments of the same type. */
3188 if (rhs_type
3189 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3191 if (dump_enabled_p ())
3192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3193 "argument types differ.\n");
3194 return false;
3196 if (!rhs_type)
3197 rhs_type = TREE_TYPE (op);
3199 if (!vectype_in)
3200 vectype_in = opvectype;
3201 else if (opvectype
3202 && opvectype != vectype_in)
3204 if (dump_enabled_p ())
3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3206 "argument vector types differ.\n");
3207 return false;
3210 /* If all arguments are external or constant defs use a vector type with
3211 the same size as the output vector type. */
3212 if (!vectype_in)
3213 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3214 if (vec_stmt)
3215 gcc_assert (vectype_in);
3216 if (!vectype_in)
3218 if (dump_enabled_p ())
3219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3220 "no vectype for scalar type %T\n", rhs_type);
3222 return false;
3225 /* FORNOW */
3226 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3227 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3228 if (known_eq (nunits_in * 2, nunits_out))
3229 modifier = NARROW;
3230 else if (known_eq (nunits_out, nunits_in))
3231 modifier = NONE;
3232 else if (known_eq (nunits_out * 2, nunits_in))
3233 modifier = WIDEN;
3234 else
3235 return false;
3237 /* We only handle functions that do not read or clobber memory. */
3238 if (gimple_vuse (stmt))
3240 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3242 "function reads from or writes to memory.\n");
3243 return false;
3246 /* For now, we only vectorize functions if a target specific builtin
3247 is available. TODO -- in some cases, it might be profitable to
3248 insert the calls for pieces of the vector, in order to be able
3249 to vectorize other operations in the loop. */
3250 fndecl = NULL_TREE;
3251 internal_fn ifn = IFN_LAST;
3252 tree callee = gimple_call_fndecl (stmt);
3254 /* First try using an internal function. */
3255 tree_code convert_code = ERROR_MARK;
3256 if (cfn != CFN_LAST
3257 && (modifier == NONE
3258 || (modifier == NARROW
3259 && simple_integer_narrowing (vectype_out, vectype_in,
3260 &convert_code))))
3261 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3262 vectype_in);
3264 /* If that fails, try asking for a target-specific built-in function. */
3265 if (ifn == IFN_LAST)
3267 if (cfn != CFN_LAST)
3268 fndecl = targetm.vectorize.builtin_vectorized_function
3269 (cfn, vectype_out, vectype_in);
3270 else if (callee)
3271 fndecl = targetm.vectorize.builtin_md_vectorized_function
3272 (callee, vectype_out, vectype_in);
3275 if (ifn == IFN_LAST && !fndecl)
3277 if (cfn == CFN_GOMP_SIMD_LANE
3278 && !slp_node
3279 && loop_vinfo
3280 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3281 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3282 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3283 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3285 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3286 { 0, 1, 2, ... vf - 1 } vector. */
3287 gcc_assert (nargs == 0);
3289 else if (modifier == NONE
3290 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3291 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3292 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3293 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3294 vectype_in, cost_vec);
3295 else
3297 if (dump_enabled_p ())
3298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3299 "function is not vectorizable.\n");
3300 return false;
3304 if (slp_node)
3305 ncopies = 1;
3306 else if (modifier == NARROW && ifn == IFN_LAST)
3307 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3308 else
3309 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3311 /* Sanity check: make sure that at least one copy of the vectorized stmt
3312 needs to be generated. */
3313 gcc_assert (ncopies >= 1);
3315 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3316 if (!vec_stmt) /* transformation not required. */
3318 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3319 DUMP_VECT_SCOPE ("vectorizable_call");
3320 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3321 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3322 record_stmt_cost (cost_vec, ncopies / 2,
3323 vec_promote_demote, stmt_info, 0, vect_body);
3325 if (loop_vinfo && mask_opno >= 0)
3327 unsigned int nvectors = (slp_node
3328 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3329 : ncopies);
3330 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3332 return true;
3335 /* Transform. */
3337 if (dump_enabled_p ())
3338 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3340 /* Handle def. */
3341 scalar_dest = gimple_call_lhs (stmt);
3342 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3344 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3346 stmt_vec_info new_stmt_info = NULL;
3347 prev_stmt_info = NULL;
3348 if (modifier == NONE || ifn != IFN_LAST)
3350 tree prev_res = NULL_TREE;
3351 vargs.safe_grow (nargs);
3352 orig_vargs.safe_grow (nargs);
3353 for (j = 0; j < ncopies; ++j)
3355 /* Build argument list for the vectorized call. */
3356 if (slp_node)
3358 auto_vec<vec<tree> > vec_defs (nargs);
3359 vec<tree> vec_oprnds0;
3361 for (i = 0; i < nargs; i++)
3362 vargs[i] = gimple_call_arg (stmt, i);
3363 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3364 vec_oprnds0 = vec_defs[0];
3366 /* Arguments are ready. Create the new vector stmt. */
3367 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3369 size_t k;
3370 for (k = 0; k < nargs; k++)
3372 vec<tree> vec_oprndsk = vec_defs[k];
3373 vargs[k] = vec_oprndsk[i];
3375 if (modifier == NARROW)
3377 /* We don't define any narrowing conditional functions
3378 at present. */
3379 gcc_assert (mask_opno < 0);
3380 tree half_res = make_ssa_name (vectype_in);
3381 gcall *call
3382 = gimple_build_call_internal_vec (ifn, vargs);
3383 gimple_call_set_lhs (call, half_res);
3384 gimple_call_set_nothrow (call, true);
3385 new_stmt_info
3386 = vect_finish_stmt_generation (stmt_info, call, gsi);
3387 if ((i & 1) == 0)
3389 prev_res = half_res;
3390 continue;
3392 new_temp = make_ssa_name (vec_dest);
3393 gimple *new_stmt
3394 = gimple_build_assign (new_temp, convert_code,
3395 prev_res, half_res);
3396 new_stmt_info
3397 = vect_finish_stmt_generation (stmt_info, new_stmt,
3398 gsi);
3400 else
3402 if (mask_opno >= 0 && masked_loop_p)
3404 unsigned int vec_num = vec_oprnds0.length ();
3405 /* Always true for SLP. */
3406 gcc_assert (ncopies == 1);
3407 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3408 vectype_out, i);
3409 vargs[mask_opno] = prepare_load_store_mask
3410 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3413 gcall *call;
3414 if (ifn != IFN_LAST)
3415 call = gimple_build_call_internal_vec (ifn, vargs);
3416 else
3417 call = gimple_build_call_vec (fndecl, vargs);
3418 new_temp = make_ssa_name (vec_dest, call);
3419 gimple_call_set_lhs (call, new_temp);
3420 gimple_call_set_nothrow (call, true);
3421 new_stmt_info
3422 = vect_finish_stmt_generation (stmt_info, call, gsi);
3424 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3427 for (i = 0; i < nargs; i++)
3429 vec<tree> vec_oprndsi = vec_defs[i];
3430 vec_oprndsi.release ();
3432 continue;
3435 for (i = 0; i < nargs; i++)
3437 op = gimple_call_arg (stmt, i);
3438 if (j == 0)
3439 vec_oprnd0
3440 = vect_get_vec_def_for_operand (op, stmt_info);
3441 else
3442 vec_oprnd0
3443 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3445 orig_vargs[i] = vargs[i] = vec_oprnd0;
3448 if (mask_opno >= 0 && masked_loop_p)
3450 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3451 vectype_out, j);
3452 vargs[mask_opno]
3453 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3454 vargs[mask_opno], gsi);
3457 if (cfn == CFN_GOMP_SIMD_LANE)
3459 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3460 tree new_var
3461 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3462 gimple *init_stmt = gimple_build_assign (new_var, cst);
3463 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3464 new_temp = make_ssa_name (vec_dest);
3465 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3466 new_stmt_info
3467 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3469 else if (modifier == NARROW)
3471 /* We don't define any narrowing conditional functions at
3472 present. */
3473 gcc_assert (mask_opno < 0);
3474 tree half_res = make_ssa_name (vectype_in);
3475 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3476 gimple_call_set_lhs (call, half_res);
3477 gimple_call_set_nothrow (call, true);
3478 new_stmt_info
3479 = vect_finish_stmt_generation (stmt_info, call, gsi);
3480 if ((j & 1) == 0)
3482 prev_res = half_res;
3483 continue;
3485 new_temp = make_ssa_name (vec_dest);
3486 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3487 prev_res, half_res);
3488 new_stmt_info
3489 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3491 else
3493 gcall *call;
3494 if (ifn != IFN_LAST)
3495 call = gimple_build_call_internal_vec (ifn, vargs);
3496 else
3497 call = gimple_build_call_vec (fndecl, vargs);
3498 new_temp = make_ssa_name (vec_dest, call);
3499 gimple_call_set_lhs (call, new_temp);
3500 gimple_call_set_nothrow (call, true);
3501 new_stmt_info
3502 = vect_finish_stmt_generation (stmt_info, call, gsi);
3505 if (j == (modifier == NARROW ? 1 : 0))
3506 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3507 else
3508 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3510 prev_stmt_info = new_stmt_info;
3513 else if (modifier == NARROW)
3515 /* We don't define any narrowing conditional functions at present. */
3516 gcc_assert (mask_opno < 0);
3517 for (j = 0; j < ncopies; ++j)
3519 /* Build argument list for the vectorized call. */
3520 if (j == 0)
3521 vargs.create (nargs * 2);
3522 else
3523 vargs.truncate (0);
3525 if (slp_node)
3527 auto_vec<vec<tree> > vec_defs (nargs);
3528 vec<tree> vec_oprnds0;
3530 for (i = 0; i < nargs; i++)
3531 vargs.quick_push (gimple_call_arg (stmt, i));
3532 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3533 vec_oprnds0 = vec_defs[0];
3535 /* Arguments are ready. Create the new vector stmt. */
3536 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3538 size_t k;
3539 vargs.truncate (0);
3540 for (k = 0; k < nargs; k++)
3542 vec<tree> vec_oprndsk = vec_defs[k];
3543 vargs.quick_push (vec_oprndsk[i]);
3544 vargs.quick_push (vec_oprndsk[i + 1]);
3546 gcall *call;
3547 if (ifn != IFN_LAST)
3548 call = gimple_build_call_internal_vec (ifn, vargs);
3549 else
3550 call = gimple_build_call_vec (fndecl, vargs);
3551 new_temp = make_ssa_name (vec_dest, call);
3552 gimple_call_set_lhs (call, new_temp);
3553 gimple_call_set_nothrow (call, true);
3554 new_stmt_info
3555 = vect_finish_stmt_generation (stmt_info, call, gsi);
3556 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3559 for (i = 0; i < nargs; i++)
3561 vec<tree> vec_oprndsi = vec_defs[i];
3562 vec_oprndsi.release ();
3564 continue;
3567 for (i = 0; i < nargs; i++)
3569 op = gimple_call_arg (stmt, i);
3570 if (j == 0)
3572 vec_oprnd0
3573 = vect_get_vec_def_for_operand (op, stmt_info);
3574 vec_oprnd1
3575 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3577 else
3579 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3580 2 * i + 1);
3581 vec_oprnd0
3582 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3583 vec_oprnd1
3584 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3587 vargs.quick_push (vec_oprnd0);
3588 vargs.quick_push (vec_oprnd1);
3591 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3592 new_temp = make_ssa_name (vec_dest, new_stmt);
3593 gimple_call_set_lhs (new_stmt, new_temp);
3594 new_stmt_info
3595 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3597 if (j == 0)
3598 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3599 else
3600 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3602 prev_stmt_info = new_stmt_info;
3605 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3607 else
3608 /* No current target implements this case. */
3609 return false;
3611 vargs.release ();
3613 /* The call in STMT might prevent it from being removed in dce.
3614 We however cannot remove it here, due to the way the ssa name
3615 it defines is mapped to the new definition. So just replace
3616 rhs of the statement with something harmless. */
3618 if (slp_node)
3619 return true;
3621 stmt_info = vect_orig_stmt (stmt_info);
3622 lhs = gimple_get_lhs (stmt_info->stmt);
3624 gassign *new_stmt
3625 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3626 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3628 return true;
3632 struct simd_call_arg_info
3634 tree vectype;
3635 tree op;
3636 HOST_WIDE_INT linear_step;
3637 enum vect_def_type dt;
3638 unsigned int align;
3639 bool simd_lane_linear;
3642 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3643 is linear within simd lane (but not within whole loop), note it in
3644 *ARGINFO. */
3646 static void
3647 vect_simd_lane_linear (tree op, struct loop *loop,
3648 struct simd_call_arg_info *arginfo)
3650 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3652 if (!is_gimple_assign (def_stmt)
3653 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3654 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3655 return;
3657 tree base = gimple_assign_rhs1 (def_stmt);
3658 HOST_WIDE_INT linear_step = 0;
3659 tree v = gimple_assign_rhs2 (def_stmt);
3660 while (TREE_CODE (v) == SSA_NAME)
3662 tree t;
3663 def_stmt = SSA_NAME_DEF_STMT (v);
3664 if (is_gimple_assign (def_stmt))
3665 switch (gimple_assign_rhs_code (def_stmt))
3667 case PLUS_EXPR:
3668 t = gimple_assign_rhs2 (def_stmt);
3669 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3670 return;
3671 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3672 v = gimple_assign_rhs1 (def_stmt);
3673 continue;
3674 case MULT_EXPR:
3675 t = gimple_assign_rhs2 (def_stmt);
3676 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3677 return;
3678 linear_step = tree_to_shwi (t);
3679 v = gimple_assign_rhs1 (def_stmt);
3680 continue;
3681 CASE_CONVERT:
3682 t = gimple_assign_rhs1 (def_stmt);
3683 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3684 || (TYPE_PRECISION (TREE_TYPE (v))
3685 < TYPE_PRECISION (TREE_TYPE (t))))
3686 return;
3687 if (!linear_step)
3688 linear_step = 1;
3689 v = t;
3690 continue;
3691 default:
3692 return;
3694 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3695 && loop->simduid
3696 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3697 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3698 == loop->simduid))
3700 if (!linear_step)
3701 linear_step = 1;
3702 arginfo->linear_step = linear_step;
3703 arginfo->op = base;
3704 arginfo->simd_lane_linear = true;
3705 return;
3710 /* Return the number of elements in vector type VECTYPE, which is associated
3711 with a SIMD clone. At present these vectors always have a constant
3712 length. */
3714 static unsigned HOST_WIDE_INT
3715 simd_clone_subparts (tree vectype)
3717 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3720 /* Function vectorizable_simd_clone_call.
3722 Check if STMT_INFO performs a function call that can be vectorized
3723 by calling a simd clone of the function.
3724 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3725 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3726 Return true if STMT_INFO is vectorizable in this way. */
3728 static bool
3729 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3730 gimple_stmt_iterator *gsi,
3731 stmt_vec_info *vec_stmt, slp_tree slp_node,
3732 stmt_vector_for_cost *)
3734 tree vec_dest;
3735 tree scalar_dest;
3736 tree op, type;
3737 tree vec_oprnd0 = NULL_TREE;
3738 stmt_vec_info prev_stmt_info;
3739 tree vectype;
3740 unsigned int nunits;
3741 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3742 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3743 vec_info *vinfo = stmt_info->vinfo;
3744 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3745 tree fndecl, new_temp;
3746 int ncopies, j;
3747 auto_vec<simd_call_arg_info> arginfo;
3748 vec<tree> vargs = vNULL;
3749 size_t i, nargs;
3750 tree lhs, rtype, ratype;
3751 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3753 /* Is STMT a vectorizable call? */
3754 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3755 if (!stmt)
3756 return false;
3758 fndecl = gimple_call_fndecl (stmt);
3759 if (fndecl == NULL_TREE)
3760 return false;
3762 struct cgraph_node *node = cgraph_node::get (fndecl);
3763 if (node == NULL || node->simd_clones == NULL)
3764 return false;
3766 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3767 return false;
3769 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3770 && ! vec_stmt)
3771 return false;
3773 if (gimple_call_lhs (stmt)
3774 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3775 return false;
3777 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3779 vectype = STMT_VINFO_VECTYPE (stmt_info);
3781 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3782 return false;
3784 /* FORNOW */
3785 if (slp_node)
3786 return false;
3788 /* Process function arguments. */
3789 nargs = gimple_call_num_args (stmt);
3791 /* Bail out if the function has zero arguments. */
3792 if (nargs == 0)
3793 return false;
3795 arginfo.reserve (nargs, true);
3797 for (i = 0; i < nargs; i++)
3799 simd_call_arg_info thisarginfo;
3800 affine_iv iv;
3802 thisarginfo.linear_step = 0;
3803 thisarginfo.align = 0;
3804 thisarginfo.op = NULL_TREE;
3805 thisarginfo.simd_lane_linear = false;
3807 op = gimple_call_arg (stmt, i);
3808 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3809 &thisarginfo.vectype)
3810 || thisarginfo.dt == vect_uninitialized_def)
3812 if (dump_enabled_p ())
3813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3814 "use not simple.\n");
3815 return false;
3818 if (thisarginfo.dt == vect_constant_def
3819 || thisarginfo.dt == vect_external_def)
3820 gcc_assert (thisarginfo.vectype == NULL_TREE);
3821 else
3822 gcc_assert (thisarginfo.vectype != NULL_TREE);
3824 /* For linear arguments, the analyze phase should have saved
3825 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3826 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3827 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3829 gcc_assert (vec_stmt);
3830 thisarginfo.linear_step
3831 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3832 thisarginfo.op
3833 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3834 thisarginfo.simd_lane_linear
3835 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3836 == boolean_true_node);
3837 /* If loop has been peeled for alignment, we need to adjust it. */
3838 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3839 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3840 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3842 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3843 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3844 tree opt = TREE_TYPE (thisarginfo.op);
3845 bias = fold_convert (TREE_TYPE (step), bias);
3846 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3847 thisarginfo.op
3848 = fold_build2 (POINTER_TYPE_P (opt)
3849 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3850 thisarginfo.op, bias);
3853 else if (!vec_stmt
3854 && thisarginfo.dt != vect_constant_def
3855 && thisarginfo.dt != vect_external_def
3856 && loop_vinfo
3857 && TREE_CODE (op) == SSA_NAME
3858 && simple_iv (loop, loop_containing_stmt (stmt), op,
3859 &iv, false)
3860 && tree_fits_shwi_p (iv.step))
3862 thisarginfo.linear_step = tree_to_shwi (iv.step);
3863 thisarginfo.op = iv.base;
3865 else if ((thisarginfo.dt == vect_constant_def
3866 || thisarginfo.dt == vect_external_def)
3867 && POINTER_TYPE_P (TREE_TYPE (op)))
3868 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3869 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3870 linear too. */
3871 if (POINTER_TYPE_P (TREE_TYPE (op))
3872 && !thisarginfo.linear_step
3873 && !vec_stmt
3874 && thisarginfo.dt != vect_constant_def
3875 && thisarginfo.dt != vect_external_def
3876 && loop_vinfo
3877 && !slp_node
3878 && TREE_CODE (op) == SSA_NAME)
3879 vect_simd_lane_linear (op, loop, &thisarginfo);
3881 arginfo.quick_push (thisarginfo);
3884 unsigned HOST_WIDE_INT vf;
3885 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3887 if (dump_enabled_p ())
3888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3889 "not considering SIMD clones; not yet supported"
3890 " for variable-width vectors.\n");
3891 return false;
3894 unsigned int badness = 0;
3895 struct cgraph_node *bestn = NULL;
3896 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3897 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3898 else
3899 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3900 n = n->simdclone->next_clone)
3902 unsigned int this_badness = 0;
3903 if (n->simdclone->simdlen > vf
3904 || n->simdclone->nargs != nargs)
3905 continue;
3906 if (n->simdclone->simdlen < vf)
3907 this_badness += (exact_log2 (vf)
3908 - exact_log2 (n->simdclone->simdlen)) * 1024;
3909 if (n->simdclone->inbranch)
3910 this_badness += 2048;
3911 int target_badness = targetm.simd_clone.usable (n);
3912 if (target_badness < 0)
3913 continue;
3914 this_badness += target_badness * 512;
3915 /* FORNOW: Have to add code to add the mask argument. */
3916 if (n->simdclone->inbranch)
3917 continue;
3918 for (i = 0; i < nargs; i++)
3920 switch (n->simdclone->args[i].arg_type)
3922 case SIMD_CLONE_ARG_TYPE_VECTOR:
3923 if (!useless_type_conversion_p
3924 (n->simdclone->args[i].orig_type,
3925 TREE_TYPE (gimple_call_arg (stmt, i))))
3926 i = -1;
3927 else if (arginfo[i].dt == vect_constant_def
3928 || arginfo[i].dt == vect_external_def
3929 || arginfo[i].linear_step)
3930 this_badness += 64;
3931 break;
3932 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3933 if (arginfo[i].dt != vect_constant_def
3934 && arginfo[i].dt != vect_external_def)
3935 i = -1;
3936 break;
3937 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3938 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3939 if (arginfo[i].dt == vect_constant_def
3940 || arginfo[i].dt == vect_external_def
3941 || (arginfo[i].linear_step
3942 != n->simdclone->args[i].linear_step))
3943 i = -1;
3944 break;
3945 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3946 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3947 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3948 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3949 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3950 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3951 /* FORNOW */
3952 i = -1;
3953 break;
3954 case SIMD_CLONE_ARG_TYPE_MASK:
3955 gcc_unreachable ();
3957 if (i == (size_t) -1)
3958 break;
3959 if (n->simdclone->args[i].alignment > arginfo[i].align)
3961 i = -1;
3962 break;
3964 if (arginfo[i].align)
3965 this_badness += (exact_log2 (arginfo[i].align)
3966 - exact_log2 (n->simdclone->args[i].alignment));
3968 if (i == (size_t) -1)
3969 continue;
3970 if (bestn == NULL || this_badness < badness)
3972 bestn = n;
3973 badness = this_badness;
3977 if (bestn == NULL)
3978 return false;
3980 for (i = 0; i < nargs; i++)
3981 if ((arginfo[i].dt == vect_constant_def
3982 || arginfo[i].dt == vect_external_def)
3983 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3985 arginfo[i].vectype
3986 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3987 i)));
3988 if (arginfo[i].vectype == NULL
3989 || (simd_clone_subparts (arginfo[i].vectype)
3990 > bestn->simdclone->simdlen))
3991 return false;
3994 fndecl = bestn->decl;
3995 nunits = bestn->simdclone->simdlen;
3996 ncopies = vf / nunits;
3998 /* If the function isn't const, only allow it in simd loops where user
3999 has asserted that at least nunits consecutive iterations can be
4000 performed using SIMD instructions. */
4001 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4002 && gimple_vuse (stmt))
4003 return false;
4005 /* Sanity check: make sure that at least one copy of the vectorized stmt
4006 needs to be generated. */
4007 gcc_assert (ncopies >= 1);
4009 if (!vec_stmt) /* transformation not required. */
4011 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4012 for (i = 0; i < nargs; i++)
4013 if ((bestn->simdclone->args[i].arg_type
4014 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4015 || (bestn->simdclone->args[i].arg_type
4016 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4018 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4019 + 1);
4020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4021 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4022 ? size_type_node : TREE_TYPE (arginfo[i].op);
4023 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4024 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4025 tree sll = arginfo[i].simd_lane_linear
4026 ? boolean_true_node : boolean_false_node;
4027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4029 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4030 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4031 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4032 return true;
4035 /* Transform. */
4037 if (dump_enabled_p ())
4038 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4040 /* Handle def. */
4041 scalar_dest = gimple_call_lhs (stmt);
4042 vec_dest = NULL_TREE;
4043 rtype = NULL_TREE;
4044 ratype = NULL_TREE;
4045 if (scalar_dest)
4047 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4048 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4049 if (TREE_CODE (rtype) == ARRAY_TYPE)
4051 ratype = rtype;
4052 rtype = TREE_TYPE (ratype);
4056 prev_stmt_info = NULL;
4057 for (j = 0; j < ncopies; ++j)
4059 /* Build argument list for the vectorized call. */
4060 if (j == 0)
4061 vargs.create (nargs);
4062 else
4063 vargs.truncate (0);
4065 for (i = 0; i < nargs; i++)
4067 unsigned int k, l, m, o;
4068 tree atype;
4069 op = gimple_call_arg (stmt, i);
4070 switch (bestn->simdclone->args[i].arg_type)
4072 case SIMD_CLONE_ARG_TYPE_VECTOR:
4073 atype = bestn->simdclone->args[i].vector_type;
4074 o = nunits / simd_clone_subparts (atype);
4075 for (m = j * o; m < (j + 1) * o; m++)
4077 if (simd_clone_subparts (atype)
4078 < simd_clone_subparts (arginfo[i].vectype))
4080 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4081 k = (simd_clone_subparts (arginfo[i].vectype)
4082 / simd_clone_subparts (atype));
4083 gcc_assert ((k & (k - 1)) == 0);
4084 if (m == 0)
4085 vec_oprnd0
4086 = vect_get_vec_def_for_operand (op, stmt_info);
4087 else
4089 vec_oprnd0 = arginfo[i].op;
4090 if ((m & (k - 1)) == 0)
4091 vec_oprnd0
4092 = vect_get_vec_def_for_stmt_copy (vinfo,
4093 vec_oprnd0);
4095 arginfo[i].op = vec_oprnd0;
4096 vec_oprnd0
4097 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4098 bitsize_int (prec),
4099 bitsize_int ((m & (k - 1)) * prec));
4100 gassign *new_stmt
4101 = gimple_build_assign (make_ssa_name (atype),
4102 vec_oprnd0);
4103 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4104 vargs.safe_push (gimple_assign_lhs (new_stmt));
4106 else
4108 k = (simd_clone_subparts (atype)
4109 / simd_clone_subparts (arginfo[i].vectype));
4110 gcc_assert ((k & (k - 1)) == 0);
4111 vec<constructor_elt, va_gc> *ctor_elts;
4112 if (k != 1)
4113 vec_alloc (ctor_elts, k);
4114 else
4115 ctor_elts = NULL;
4116 for (l = 0; l < k; l++)
4118 if (m == 0 && l == 0)
4119 vec_oprnd0
4120 = vect_get_vec_def_for_operand (op, stmt_info);
4121 else
4122 vec_oprnd0
4123 = vect_get_vec_def_for_stmt_copy (vinfo,
4124 arginfo[i].op);
4125 arginfo[i].op = vec_oprnd0;
4126 if (k == 1)
4127 break;
4128 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4129 vec_oprnd0);
4131 if (k == 1)
4132 vargs.safe_push (vec_oprnd0);
4133 else
4135 vec_oprnd0 = build_constructor (atype, ctor_elts);
4136 gassign *new_stmt
4137 = gimple_build_assign (make_ssa_name (atype),
4138 vec_oprnd0);
4139 vect_finish_stmt_generation (stmt_info, new_stmt,
4140 gsi);
4141 vargs.safe_push (gimple_assign_lhs (new_stmt));
4145 break;
4146 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4147 vargs.safe_push (op);
4148 break;
4149 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4150 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4151 if (j == 0)
4153 gimple_seq stmts;
4154 arginfo[i].op
4155 = force_gimple_operand (arginfo[i].op, &stmts, true,
4156 NULL_TREE);
4157 if (stmts != NULL)
4159 basic_block new_bb;
4160 edge pe = loop_preheader_edge (loop);
4161 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4162 gcc_assert (!new_bb);
4164 if (arginfo[i].simd_lane_linear)
4166 vargs.safe_push (arginfo[i].op);
4167 break;
4169 tree phi_res = copy_ssa_name (op);
4170 gphi *new_phi = create_phi_node (phi_res, loop->header);
4171 loop_vinfo->add_stmt (new_phi);
4172 add_phi_arg (new_phi, arginfo[i].op,
4173 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4174 enum tree_code code
4175 = POINTER_TYPE_P (TREE_TYPE (op))
4176 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4177 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4178 ? sizetype : TREE_TYPE (op);
4179 widest_int cst
4180 = wi::mul (bestn->simdclone->args[i].linear_step,
4181 ncopies * nunits);
4182 tree tcst = wide_int_to_tree (type, cst);
4183 tree phi_arg = copy_ssa_name (op);
4184 gassign *new_stmt
4185 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4186 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4187 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4188 loop_vinfo->add_stmt (new_stmt);
4189 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4190 UNKNOWN_LOCATION);
4191 arginfo[i].op = phi_res;
4192 vargs.safe_push (phi_res);
4194 else
4196 enum tree_code code
4197 = POINTER_TYPE_P (TREE_TYPE (op))
4198 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4199 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4200 ? sizetype : TREE_TYPE (op);
4201 widest_int cst
4202 = wi::mul (bestn->simdclone->args[i].linear_step,
4203 j * nunits);
4204 tree tcst = wide_int_to_tree (type, cst);
4205 new_temp = make_ssa_name (TREE_TYPE (op));
4206 gassign *new_stmt
4207 = gimple_build_assign (new_temp, code,
4208 arginfo[i].op, tcst);
4209 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4210 vargs.safe_push (new_temp);
4212 break;
4213 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4214 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4215 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4216 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4217 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4218 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4219 default:
4220 gcc_unreachable ();
4224 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4225 if (vec_dest)
4227 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4228 if (ratype)
4229 new_temp = create_tmp_var (ratype);
4230 else if (simd_clone_subparts (vectype)
4231 == simd_clone_subparts (rtype))
4232 new_temp = make_ssa_name (vec_dest, new_call);
4233 else
4234 new_temp = make_ssa_name (rtype, new_call);
4235 gimple_call_set_lhs (new_call, new_temp);
4237 stmt_vec_info new_stmt_info
4238 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4240 if (vec_dest)
4242 if (simd_clone_subparts (vectype) < nunits)
4244 unsigned int k, l;
4245 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4246 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4247 k = nunits / simd_clone_subparts (vectype);
4248 gcc_assert ((k & (k - 1)) == 0);
4249 for (l = 0; l < k; l++)
4251 tree t;
4252 if (ratype)
4254 t = build_fold_addr_expr (new_temp);
4255 t = build2 (MEM_REF, vectype, t,
4256 build_int_cst (TREE_TYPE (t), l * bytes));
4258 else
4259 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4260 bitsize_int (prec), bitsize_int (l * prec));
4261 gimple *new_stmt
4262 = gimple_build_assign (make_ssa_name (vectype), t);
4263 new_stmt_info
4264 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4266 if (j == 0 && l == 0)
4267 STMT_VINFO_VEC_STMT (stmt_info)
4268 = *vec_stmt = new_stmt_info;
4269 else
4270 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4272 prev_stmt_info = new_stmt_info;
4275 if (ratype)
4276 vect_clobber_variable (stmt_info, gsi, new_temp);
4277 continue;
4279 else if (simd_clone_subparts (vectype) > nunits)
4281 unsigned int k = (simd_clone_subparts (vectype)
4282 / simd_clone_subparts (rtype));
4283 gcc_assert ((k & (k - 1)) == 0);
4284 if ((j & (k - 1)) == 0)
4285 vec_alloc (ret_ctor_elts, k);
4286 if (ratype)
4288 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4289 for (m = 0; m < o; m++)
4291 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4292 size_int (m), NULL_TREE, NULL_TREE);
4293 gimple *new_stmt
4294 = gimple_build_assign (make_ssa_name (rtype), tem);
4295 new_stmt_info
4296 = vect_finish_stmt_generation (stmt_info, new_stmt,
4297 gsi);
4298 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4299 gimple_assign_lhs (new_stmt));
4301 vect_clobber_variable (stmt_info, gsi, new_temp);
4303 else
4304 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4305 if ((j & (k - 1)) != k - 1)
4306 continue;
4307 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4308 gimple *new_stmt
4309 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4310 new_stmt_info
4311 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4313 if ((unsigned) j == k - 1)
4314 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4315 else
4316 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4318 prev_stmt_info = new_stmt_info;
4319 continue;
4321 else if (ratype)
4323 tree t = build_fold_addr_expr (new_temp);
4324 t = build2 (MEM_REF, vectype, t,
4325 build_int_cst (TREE_TYPE (t), 0));
4326 gimple *new_stmt
4327 = gimple_build_assign (make_ssa_name (vec_dest), t);
4328 new_stmt_info
4329 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4330 vect_clobber_variable (stmt_info, gsi, new_temp);
4334 if (j == 0)
4335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4336 else
4337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4339 prev_stmt_info = new_stmt_info;
4342 vargs.release ();
4344 /* The call in STMT might prevent it from being removed in dce.
4345 We however cannot remove it here, due to the way the ssa name
4346 it defines is mapped to the new definition. So just replace
4347 rhs of the statement with something harmless. */
4349 if (slp_node)
4350 return true;
4352 gimple *new_stmt;
4353 if (scalar_dest)
4355 type = TREE_TYPE (scalar_dest);
4356 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4357 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4359 else
4360 new_stmt = gimple_build_nop ();
4361 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4362 unlink_stmt_vdef (stmt);
4364 return true;
4368 /* Function vect_gen_widened_results_half
4370 Create a vector stmt whose code, type, number of arguments, and result
4371 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4372 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4373 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4374 needs to be created (DECL is a function-decl of a target-builtin).
4375 STMT_INFO is the original scalar stmt that we are vectorizing. */
4377 static gimple *
4378 vect_gen_widened_results_half (enum tree_code code,
4379 tree decl,
4380 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4381 tree vec_dest, gimple_stmt_iterator *gsi,
4382 stmt_vec_info stmt_info)
4384 gimple *new_stmt;
4385 tree new_temp;
4387 /* Generate half of the widened result: */
4388 if (code == CALL_EXPR)
4390 /* Target specific support */
4391 if (op_type == binary_op)
4392 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4393 else
4394 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4395 new_temp = make_ssa_name (vec_dest, new_stmt);
4396 gimple_call_set_lhs (new_stmt, new_temp);
4398 else
4400 /* Generic support */
4401 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4402 if (op_type != binary_op)
4403 vec_oprnd1 = NULL;
4404 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4405 new_temp = make_ssa_name (vec_dest, new_stmt);
4406 gimple_assign_set_lhs (new_stmt, new_temp);
4408 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4410 return new_stmt;
4414 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4415 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4416 containing scalar operand), and for the rest we get a copy with
4417 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4418 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4419 The vectors are collected into VEC_OPRNDS. */
4421 static void
4422 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4423 vec<tree> *vec_oprnds, int multi_step_cvt)
4425 vec_info *vinfo = stmt_info->vinfo;
4426 tree vec_oprnd;
4428 /* Get first vector operand. */
4429 /* All the vector operands except the very first one (that is scalar oprnd)
4430 are stmt copies. */
4431 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4432 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4433 else
4434 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4436 vec_oprnds->quick_push (vec_oprnd);
4438 /* Get second vector operand. */
4439 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4440 vec_oprnds->quick_push (vec_oprnd);
4442 *oprnd = vec_oprnd;
4444 /* For conversion in multiple steps, continue to get operands
4445 recursively. */
4446 if (multi_step_cvt)
4447 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4448 multi_step_cvt - 1);
4452 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4453 For multi-step conversions store the resulting vectors and call the function
4454 recursively. */
4456 static void
4457 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4458 int multi_step_cvt,
4459 stmt_vec_info stmt_info,
4460 vec<tree> vec_dsts,
4461 gimple_stmt_iterator *gsi,
4462 slp_tree slp_node, enum tree_code code,
4463 stmt_vec_info *prev_stmt_info)
4465 unsigned int i;
4466 tree vop0, vop1, new_tmp, vec_dest;
4468 vec_dest = vec_dsts.pop ();
4470 for (i = 0; i < vec_oprnds->length (); i += 2)
4472 /* Create demotion operation. */
4473 vop0 = (*vec_oprnds)[i];
4474 vop1 = (*vec_oprnds)[i + 1];
4475 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4476 new_tmp = make_ssa_name (vec_dest, new_stmt);
4477 gimple_assign_set_lhs (new_stmt, new_tmp);
4478 stmt_vec_info new_stmt_info
4479 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4481 if (multi_step_cvt)
4482 /* Store the resulting vector for next recursive call. */
4483 (*vec_oprnds)[i/2] = new_tmp;
4484 else
4486 /* This is the last step of the conversion sequence. Store the
4487 vectors in SLP_NODE or in vector info of the scalar statement
4488 (or in STMT_VINFO_RELATED_STMT chain). */
4489 if (slp_node)
4490 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4491 else
4493 if (!*prev_stmt_info)
4494 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4495 else
4496 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4498 *prev_stmt_info = new_stmt_info;
4503 /* For multi-step demotion operations we first generate demotion operations
4504 from the source type to the intermediate types, and then combine the
4505 results (stored in VEC_OPRNDS) in demotion operation to the destination
4506 type. */
4507 if (multi_step_cvt)
4509 /* At each level of recursion we have half of the operands we had at the
4510 previous level. */
4511 vec_oprnds->truncate ((i+1)/2);
4512 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4513 stmt_info, vec_dsts, gsi,
4514 slp_node, VEC_PACK_TRUNC_EXPR,
4515 prev_stmt_info);
4518 vec_dsts.quick_push (vec_dest);
4522 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4523 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4524 STMT_INFO. For multi-step conversions store the resulting vectors and
4525 call the function recursively. */
4527 static void
4528 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4529 vec<tree> *vec_oprnds1,
4530 stmt_vec_info stmt_info, tree vec_dest,
4531 gimple_stmt_iterator *gsi,
4532 enum tree_code code1,
4533 enum tree_code code2, tree decl1,
4534 tree decl2, int op_type)
4536 int i;
4537 tree vop0, vop1, new_tmp1, new_tmp2;
4538 gimple *new_stmt1, *new_stmt2;
4539 vec<tree> vec_tmp = vNULL;
4541 vec_tmp.create (vec_oprnds0->length () * 2);
4542 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4544 if (op_type == binary_op)
4545 vop1 = (*vec_oprnds1)[i];
4546 else
4547 vop1 = NULL_TREE;
4549 /* Generate the two halves of promotion operation. */
4550 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4551 op_type, vec_dest, gsi,
4552 stmt_info);
4553 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4554 op_type, vec_dest, gsi,
4555 stmt_info);
4556 if (is_gimple_call (new_stmt1))
4558 new_tmp1 = gimple_call_lhs (new_stmt1);
4559 new_tmp2 = gimple_call_lhs (new_stmt2);
4561 else
4563 new_tmp1 = gimple_assign_lhs (new_stmt1);
4564 new_tmp2 = gimple_assign_lhs (new_stmt2);
4567 /* Store the results for the next step. */
4568 vec_tmp.quick_push (new_tmp1);
4569 vec_tmp.quick_push (new_tmp2);
4572 vec_oprnds0->release ();
4573 *vec_oprnds0 = vec_tmp;
4577 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4578 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4579 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4580 Return true if STMT_INFO is vectorizable in this way. */
4582 static bool
4583 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4584 stmt_vec_info *vec_stmt, slp_tree slp_node,
4585 stmt_vector_for_cost *cost_vec)
4587 tree vec_dest;
4588 tree scalar_dest;
4589 tree op0, op1 = NULL_TREE;
4590 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4591 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4592 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4593 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4594 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4595 tree new_temp;
4596 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4597 int ndts = 2;
4598 stmt_vec_info prev_stmt_info;
4599 poly_uint64 nunits_in;
4600 poly_uint64 nunits_out;
4601 tree vectype_out, vectype_in;
4602 int ncopies, i, j;
4603 tree lhs_type, rhs_type;
4604 enum { NARROW, NONE, WIDEN } modifier;
4605 vec<tree> vec_oprnds0 = vNULL;
4606 vec<tree> vec_oprnds1 = vNULL;
4607 tree vop0;
4608 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4609 vec_info *vinfo = stmt_info->vinfo;
4610 int multi_step_cvt = 0;
4611 vec<tree> interm_types = vNULL;
4612 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4613 int op_type;
4614 unsigned short fltsz;
4616 /* Is STMT a vectorizable conversion? */
4618 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4619 return false;
4621 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4622 && ! vec_stmt)
4623 return false;
4625 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4626 if (!stmt)
4627 return false;
4629 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4630 return false;
4632 code = gimple_assign_rhs_code (stmt);
4633 if (!CONVERT_EXPR_CODE_P (code)
4634 && code != FIX_TRUNC_EXPR
4635 && code != FLOAT_EXPR
4636 && code != WIDEN_MULT_EXPR
4637 && code != WIDEN_LSHIFT_EXPR)
4638 return false;
4640 op_type = TREE_CODE_LENGTH (code);
4642 /* Check types of lhs and rhs. */
4643 scalar_dest = gimple_assign_lhs (stmt);
4644 lhs_type = TREE_TYPE (scalar_dest);
4645 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4647 op0 = gimple_assign_rhs1 (stmt);
4648 rhs_type = TREE_TYPE (op0);
4650 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4651 && !((INTEGRAL_TYPE_P (lhs_type)
4652 && INTEGRAL_TYPE_P (rhs_type))
4653 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4654 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4655 return false;
4657 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4658 && ((INTEGRAL_TYPE_P (lhs_type)
4659 && !type_has_mode_precision_p (lhs_type))
4660 || (INTEGRAL_TYPE_P (rhs_type)
4661 && !type_has_mode_precision_p (rhs_type))))
4663 if (dump_enabled_p ())
4664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4665 "type conversion to/from bit-precision unsupported."
4666 "\n");
4667 return false;
4670 /* Check the operands of the operation. */
4671 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4673 if (dump_enabled_p ())
4674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4675 "use not simple.\n");
4676 return false;
4678 if (op_type == binary_op)
4680 bool ok;
4682 op1 = gimple_assign_rhs2 (stmt);
4683 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4684 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4685 OP1. */
4686 if (CONSTANT_CLASS_P (op0))
4687 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4688 else
4689 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4691 if (!ok)
4693 if (dump_enabled_p ())
4694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4695 "use not simple.\n");
4696 return false;
4700 /* If op0 is an external or constant defs use a vector type of
4701 the same size as the output vector type. */
4702 if (!vectype_in)
4703 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4704 if (vec_stmt)
4705 gcc_assert (vectype_in);
4706 if (!vectype_in)
4708 if (dump_enabled_p ())
4709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4710 "no vectype for scalar type %T\n", rhs_type);
4712 return false;
4715 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4716 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4718 if (dump_enabled_p ())
4719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4720 "can't convert between boolean and non "
4721 "boolean vectors %T\n", rhs_type);
4723 return false;
4726 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4727 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4728 if (known_eq (nunits_out, nunits_in))
4729 modifier = NONE;
4730 else if (multiple_p (nunits_out, nunits_in))
4731 modifier = NARROW;
4732 else
4734 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4735 modifier = WIDEN;
4738 /* Multiple types in SLP are handled by creating the appropriate number of
4739 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4740 case of SLP. */
4741 if (slp_node)
4742 ncopies = 1;
4743 else if (modifier == NARROW)
4744 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4745 else
4746 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4748 /* Sanity check: make sure that at least one copy of the vectorized stmt
4749 needs to be generated. */
4750 gcc_assert (ncopies >= 1);
4752 bool found_mode = false;
4753 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4754 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4755 opt_scalar_mode rhs_mode_iter;
4757 /* Supportable by target? */
4758 switch (modifier)
4760 case NONE:
4761 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4762 return false;
4763 if (supportable_convert_operation (code, vectype_out, vectype_in,
4764 &decl1, &code1))
4765 break;
4766 /* FALLTHRU */
4767 unsupported:
4768 if (dump_enabled_p ())
4769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4770 "conversion not supported by target.\n");
4771 return false;
4773 case WIDEN:
4774 if (supportable_widening_operation (code, stmt_info, vectype_out,
4775 vectype_in, &code1, &code2,
4776 &multi_step_cvt, &interm_types))
4778 /* Binary widening operation can only be supported directly by the
4779 architecture. */
4780 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4781 break;
4784 if (code != FLOAT_EXPR
4785 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4786 goto unsupported;
4788 fltsz = GET_MODE_SIZE (lhs_mode);
4789 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4791 rhs_mode = rhs_mode_iter.require ();
4792 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4793 break;
4795 cvt_type
4796 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4797 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4798 if (cvt_type == NULL_TREE)
4799 goto unsupported;
4801 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4803 if (!supportable_convert_operation (code, vectype_out,
4804 cvt_type, &decl1, &codecvt1))
4805 goto unsupported;
4807 else if (!supportable_widening_operation (code, stmt_info,
4808 vectype_out, cvt_type,
4809 &codecvt1, &codecvt2,
4810 &multi_step_cvt,
4811 &interm_types))
4812 continue;
4813 else
4814 gcc_assert (multi_step_cvt == 0);
4816 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4817 vectype_in, &code1, &code2,
4818 &multi_step_cvt, &interm_types))
4820 found_mode = true;
4821 break;
4825 if (!found_mode)
4826 goto unsupported;
4828 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4829 codecvt2 = ERROR_MARK;
4830 else
4832 multi_step_cvt++;
4833 interm_types.safe_push (cvt_type);
4834 cvt_type = NULL_TREE;
4836 break;
4838 case NARROW:
4839 gcc_assert (op_type == unary_op);
4840 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4841 &code1, &multi_step_cvt,
4842 &interm_types))
4843 break;
4845 if (code != FIX_TRUNC_EXPR
4846 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4847 goto unsupported;
4849 cvt_type
4850 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4851 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4852 if (cvt_type == NULL_TREE)
4853 goto unsupported;
4854 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4855 &decl1, &codecvt1))
4856 goto unsupported;
4857 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4858 &code1, &multi_step_cvt,
4859 &interm_types))
4860 break;
4861 goto unsupported;
4863 default:
4864 gcc_unreachable ();
4867 if (!vec_stmt) /* transformation not required. */
4869 DUMP_VECT_SCOPE ("vectorizable_conversion");
4870 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4872 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4873 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4874 cost_vec);
4876 else if (modifier == NARROW)
4878 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4879 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4880 cost_vec);
4882 else
4884 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4885 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4886 cost_vec);
4888 interm_types.release ();
4889 return true;
4892 /* Transform. */
4893 if (dump_enabled_p ())
4894 dump_printf_loc (MSG_NOTE, vect_location,
4895 "transform conversion. ncopies = %d.\n", ncopies);
4897 if (op_type == binary_op)
4899 if (CONSTANT_CLASS_P (op0))
4900 op0 = fold_convert (TREE_TYPE (op1), op0);
4901 else if (CONSTANT_CLASS_P (op1))
4902 op1 = fold_convert (TREE_TYPE (op0), op1);
4905 /* In case of multi-step conversion, we first generate conversion operations
4906 to the intermediate types, and then from that types to the final one.
4907 We create vector destinations for the intermediate type (TYPES) received
4908 from supportable_*_operation, and store them in the correct order
4909 for future use in vect_create_vectorized_*_stmts (). */
4910 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4911 vec_dest = vect_create_destination_var (scalar_dest,
4912 (cvt_type && modifier == WIDEN)
4913 ? cvt_type : vectype_out);
4914 vec_dsts.quick_push (vec_dest);
4916 if (multi_step_cvt)
4918 for (i = interm_types.length () - 1;
4919 interm_types.iterate (i, &intermediate_type); i--)
4921 vec_dest = vect_create_destination_var (scalar_dest,
4922 intermediate_type);
4923 vec_dsts.quick_push (vec_dest);
4927 if (cvt_type)
4928 vec_dest = vect_create_destination_var (scalar_dest,
4929 modifier == WIDEN
4930 ? vectype_out : cvt_type);
4932 if (!slp_node)
4934 if (modifier == WIDEN)
4936 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4937 if (op_type == binary_op)
4938 vec_oprnds1.create (1);
4940 else if (modifier == NARROW)
4941 vec_oprnds0.create (
4942 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4944 else if (code == WIDEN_LSHIFT_EXPR)
4945 vec_oprnds1.create (slp_node->vec_stmts_size);
4947 last_oprnd = op0;
4948 prev_stmt_info = NULL;
4949 switch (modifier)
4951 case NONE:
4952 for (j = 0; j < ncopies; j++)
4954 if (j == 0)
4955 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
4956 NULL, slp_node);
4957 else
4958 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
4960 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4962 stmt_vec_info new_stmt_info;
4963 /* Arguments are ready, create the new vector stmt. */
4964 if (code1 == CALL_EXPR)
4966 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4967 new_temp = make_ssa_name (vec_dest, new_stmt);
4968 gimple_call_set_lhs (new_stmt, new_temp);
4969 new_stmt_info
4970 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4972 else
4974 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4975 gassign *new_stmt
4976 = gimple_build_assign (vec_dest, code1, vop0);
4977 new_temp = make_ssa_name (vec_dest, new_stmt);
4978 gimple_assign_set_lhs (new_stmt, new_temp);
4979 new_stmt_info
4980 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4983 if (slp_node)
4984 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4985 else
4987 if (!prev_stmt_info)
4988 STMT_VINFO_VEC_STMT (stmt_info)
4989 = *vec_stmt = new_stmt_info;
4990 else
4991 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4992 prev_stmt_info = new_stmt_info;
4996 break;
4998 case WIDEN:
4999 /* In case the vectorization factor (VF) is bigger than the number
5000 of elements that we can fit in a vectype (nunits), we have to
5001 generate more than one vector stmt - i.e - we need to "unroll"
5002 the vector stmt by a factor VF/nunits. */
5003 for (j = 0; j < ncopies; j++)
5005 /* Handle uses. */
5006 if (j == 0)
5008 if (slp_node)
5010 if (code == WIDEN_LSHIFT_EXPR)
5012 unsigned int k;
5014 vec_oprnd1 = op1;
5015 /* Store vec_oprnd1 for every vector stmt to be created
5016 for SLP_NODE. We check during the analysis that all
5017 the shift arguments are the same. */
5018 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5019 vec_oprnds1.quick_push (vec_oprnd1);
5021 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5022 &vec_oprnds0, NULL, slp_node);
5024 else
5025 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5026 &vec_oprnds1, slp_node);
5028 else
5030 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5031 vec_oprnds0.quick_push (vec_oprnd0);
5032 if (op_type == binary_op)
5034 if (code == WIDEN_LSHIFT_EXPR)
5035 vec_oprnd1 = op1;
5036 else
5037 vec_oprnd1
5038 = vect_get_vec_def_for_operand (op1, stmt_info);
5039 vec_oprnds1.quick_push (vec_oprnd1);
5043 else
5045 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5046 vec_oprnds0.truncate (0);
5047 vec_oprnds0.quick_push (vec_oprnd0);
5048 if (op_type == binary_op)
5050 if (code == WIDEN_LSHIFT_EXPR)
5051 vec_oprnd1 = op1;
5052 else
5053 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5054 vec_oprnd1);
5055 vec_oprnds1.truncate (0);
5056 vec_oprnds1.quick_push (vec_oprnd1);
5060 /* Arguments are ready. Create the new vector stmts. */
5061 for (i = multi_step_cvt; i >= 0; i--)
5063 tree this_dest = vec_dsts[i];
5064 enum tree_code c1 = code1, c2 = code2;
5065 if (i == 0 && codecvt2 != ERROR_MARK)
5067 c1 = codecvt1;
5068 c2 = codecvt2;
5070 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5071 &vec_oprnds1, stmt_info,
5072 this_dest, gsi,
5073 c1, c2, decl1, decl2,
5074 op_type);
5077 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5079 stmt_vec_info new_stmt_info;
5080 if (cvt_type)
5082 if (codecvt1 == CALL_EXPR)
5084 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5085 new_temp = make_ssa_name (vec_dest, new_stmt);
5086 gimple_call_set_lhs (new_stmt, new_temp);
5087 new_stmt_info
5088 = vect_finish_stmt_generation (stmt_info, new_stmt,
5089 gsi);
5091 else
5093 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5094 new_temp = make_ssa_name (vec_dest);
5095 gassign *new_stmt
5096 = gimple_build_assign (new_temp, codecvt1, vop0);
5097 new_stmt_info
5098 = vect_finish_stmt_generation (stmt_info, new_stmt,
5099 gsi);
5102 else
5103 new_stmt_info = vinfo->lookup_def (vop0);
5105 if (slp_node)
5106 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5107 else
5109 if (!prev_stmt_info)
5110 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5111 else
5112 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5113 prev_stmt_info = new_stmt_info;
5118 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5119 break;
5121 case NARROW:
5122 /* In case the vectorization factor (VF) is bigger than the number
5123 of elements that we can fit in a vectype (nunits), we have to
5124 generate more than one vector stmt - i.e - we need to "unroll"
5125 the vector stmt by a factor VF/nunits. */
5126 for (j = 0; j < ncopies; j++)
5128 /* Handle uses. */
5129 if (slp_node)
5130 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5131 slp_node);
5132 else
5134 vec_oprnds0.truncate (0);
5135 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5136 vect_pow2 (multi_step_cvt) - 1);
5139 /* Arguments are ready. Create the new vector stmts. */
5140 if (cvt_type)
5141 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5143 if (codecvt1 == CALL_EXPR)
5145 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5146 new_temp = make_ssa_name (vec_dest, new_stmt);
5147 gimple_call_set_lhs (new_stmt, new_temp);
5148 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5150 else
5152 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5153 new_temp = make_ssa_name (vec_dest);
5154 gassign *new_stmt
5155 = gimple_build_assign (new_temp, codecvt1, vop0);
5156 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5159 vec_oprnds0[i] = new_temp;
5162 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5163 stmt_info, vec_dsts, gsi,
5164 slp_node, code1,
5165 &prev_stmt_info);
5168 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5169 break;
5172 vec_oprnds0.release ();
5173 vec_oprnds1.release ();
5174 interm_types.release ();
5176 return true;
5180 /* Function vectorizable_assignment.
5182 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5183 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5184 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5185 Return true if STMT_INFO is vectorizable in this way. */
5187 static bool
5188 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5189 stmt_vec_info *vec_stmt, slp_tree slp_node,
5190 stmt_vector_for_cost *cost_vec)
5192 tree vec_dest;
5193 tree scalar_dest;
5194 tree op;
5195 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5196 tree new_temp;
5197 enum vect_def_type dt[1] = {vect_unknown_def_type};
5198 int ndts = 1;
5199 int ncopies;
5200 int i, j;
5201 vec<tree> vec_oprnds = vNULL;
5202 tree vop;
5203 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5204 vec_info *vinfo = stmt_info->vinfo;
5205 stmt_vec_info prev_stmt_info = NULL;
5206 enum tree_code code;
5207 tree vectype_in;
5209 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5210 return false;
5212 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5213 && ! vec_stmt)
5214 return false;
5216 /* Is vectorizable assignment? */
5217 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5218 if (!stmt)
5219 return false;
5221 scalar_dest = gimple_assign_lhs (stmt);
5222 if (TREE_CODE (scalar_dest) != SSA_NAME)
5223 return false;
5225 code = gimple_assign_rhs_code (stmt);
5226 if (gimple_assign_single_p (stmt)
5227 || code == PAREN_EXPR
5228 || CONVERT_EXPR_CODE_P (code))
5229 op = gimple_assign_rhs1 (stmt);
5230 else
5231 return false;
5233 if (code == VIEW_CONVERT_EXPR)
5234 op = TREE_OPERAND (op, 0);
5236 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5237 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5239 /* Multiple types in SLP are handled by creating the appropriate number of
5240 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5241 case of SLP. */
5242 if (slp_node)
5243 ncopies = 1;
5244 else
5245 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5247 gcc_assert (ncopies >= 1);
5249 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5251 if (dump_enabled_p ())
5252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5253 "use not simple.\n");
5254 return false;
5257 /* We can handle NOP_EXPR conversions that do not change the number
5258 of elements or the vector size. */
5259 if ((CONVERT_EXPR_CODE_P (code)
5260 || code == VIEW_CONVERT_EXPR)
5261 && (!vectype_in
5262 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5263 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5264 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5265 return false;
5267 /* We do not handle bit-precision changes. */
5268 if ((CONVERT_EXPR_CODE_P (code)
5269 || code == VIEW_CONVERT_EXPR)
5270 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5271 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5272 || !type_has_mode_precision_p (TREE_TYPE (op)))
5273 /* But a conversion that does not change the bit-pattern is ok. */
5274 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5275 > TYPE_PRECISION (TREE_TYPE (op)))
5276 && TYPE_UNSIGNED (TREE_TYPE (op)))
5277 /* Conversion between boolean types of different sizes is
5278 a simple assignment in case their vectypes are same
5279 boolean vectors. */
5280 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5281 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5283 if (dump_enabled_p ())
5284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5285 "type conversion to/from bit-precision "
5286 "unsupported.\n");
5287 return false;
5290 if (!vec_stmt) /* transformation not required. */
5292 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5293 DUMP_VECT_SCOPE ("vectorizable_assignment");
5294 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5295 return true;
5298 /* Transform. */
5299 if (dump_enabled_p ())
5300 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5302 /* Handle def. */
5303 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5305 /* Handle use. */
5306 for (j = 0; j < ncopies; j++)
5308 /* Handle uses. */
5309 if (j == 0)
5310 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5311 else
5312 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5314 /* Arguments are ready. create the new vector stmt. */
5315 stmt_vec_info new_stmt_info = NULL;
5316 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5318 if (CONVERT_EXPR_CODE_P (code)
5319 || code == VIEW_CONVERT_EXPR)
5320 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5321 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5322 new_temp = make_ssa_name (vec_dest, new_stmt);
5323 gimple_assign_set_lhs (new_stmt, new_temp);
5324 new_stmt_info
5325 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5326 if (slp_node)
5327 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5330 if (slp_node)
5331 continue;
5333 if (j == 0)
5334 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5335 else
5336 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5338 prev_stmt_info = new_stmt_info;
5341 vec_oprnds.release ();
5342 return true;
5346 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5347 either as shift by a scalar or by a vector. */
5349 bool
5350 vect_supportable_shift (enum tree_code code, tree scalar_type)
5353 machine_mode vec_mode;
5354 optab optab;
5355 int icode;
5356 tree vectype;
5358 vectype = get_vectype_for_scalar_type (scalar_type);
5359 if (!vectype)
5360 return false;
5362 optab = optab_for_tree_code (code, vectype, optab_scalar);
5363 if (!optab
5364 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5366 optab = optab_for_tree_code (code, vectype, optab_vector);
5367 if (!optab
5368 || (optab_handler (optab, TYPE_MODE (vectype))
5369 == CODE_FOR_nothing))
5370 return false;
5373 vec_mode = TYPE_MODE (vectype);
5374 icode = (int) optab_handler (optab, vec_mode);
5375 if (icode == CODE_FOR_nothing)
5376 return false;
5378 return true;
5382 /* Function vectorizable_shift.
5384 Check if STMT_INFO performs a shift operation that can be vectorized.
5385 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5386 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5387 Return true if STMT_INFO is vectorizable in this way. */
5389 bool
5390 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5391 stmt_vec_info *vec_stmt, slp_tree slp_node,
5392 stmt_vector_for_cost *cost_vec)
5394 tree vec_dest;
5395 tree scalar_dest;
5396 tree op0, op1 = NULL;
5397 tree vec_oprnd1 = NULL_TREE;
5398 tree vectype;
5399 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5400 enum tree_code code;
5401 machine_mode vec_mode;
5402 tree new_temp;
5403 optab optab;
5404 int icode;
5405 machine_mode optab_op2_mode;
5406 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5407 int ndts = 2;
5408 stmt_vec_info prev_stmt_info;
5409 poly_uint64 nunits_in;
5410 poly_uint64 nunits_out;
5411 tree vectype_out;
5412 tree op1_vectype;
5413 int ncopies;
5414 int j, i;
5415 vec<tree> vec_oprnds0 = vNULL;
5416 vec<tree> vec_oprnds1 = vNULL;
5417 tree vop0, vop1;
5418 unsigned int k;
5419 bool scalar_shift_arg = true;
5420 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5421 vec_info *vinfo = stmt_info->vinfo;
5423 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5424 return false;
5426 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5427 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5428 && ! vec_stmt)
5429 return false;
5431 /* Is STMT a vectorizable binary/unary operation? */
5432 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5433 if (!stmt)
5434 return false;
5436 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5437 return false;
5439 code = gimple_assign_rhs_code (stmt);
5441 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5442 || code == RROTATE_EXPR))
5443 return false;
5445 scalar_dest = gimple_assign_lhs (stmt);
5446 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5447 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5449 if (dump_enabled_p ())
5450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5451 "bit-precision shifts not supported.\n");
5452 return false;
5455 op0 = gimple_assign_rhs1 (stmt);
5456 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5458 if (dump_enabled_p ())
5459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5460 "use not simple.\n");
5461 return false;
5463 /* If op0 is an external or constant def use a vector type with
5464 the same size as the output vector type. */
5465 if (!vectype)
5466 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5467 if (vec_stmt)
5468 gcc_assert (vectype);
5469 if (!vectype)
5471 if (dump_enabled_p ())
5472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5473 "no vectype for scalar type\n");
5474 return false;
5477 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5478 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5479 if (maybe_ne (nunits_out, nunits_in))
5480 return false;
5482 op1 = gimple_assign_rhs2 (stmt);
5483 stmt_vec_info op1_def_stmt_info;
5484 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5485 &op1_def_stmt_info))
5487 if (dump_enabled_p ())
5488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5489 "use not simple.\n");
5490 return false;
5493 /* Multiple types in SLP are handled by creating the appropriate number of
5494 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5495 case of SLP. */
5496 if (slp_node)
5497 ncopies = 1;
5498 else
5499 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5501 gcc_assert (ncopies >= 1);
5503 /* Determine whether the shift amount is a vector, or scalar. If the
5504 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5506 if ((dt[1] == vect_internal_def
5507 || dt[1] == vect_induction_def
5508 || dt[1] == vect_nested_cycle)
5509 && !slp_node)
5510 scalar_shift_arg = false;
5511 else if (dt[1] == vect_constant_def
5512 || dt[1] == vect_external_def
5513 || dt[1] == vect_internal_def)
5515 /* In SLP, need to check whether the shift count is the same,
5516 in loops if it is a constant or invariant, it is always
5517 a scalar shift. */
5518 if (slp_node)
5520 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5521 stmt_vec_info slpstmt_info;
5523 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5525 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5526 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5527 scalar_shift_arg = false;
5531 /* If the shift amount is computed by a pattern stmt we cannot
5532 use the scalar amount directly thus give up and use a vector
5533 shift. */
5534 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5535 scalar_shift_arg = false;
5537 else
5539 if (dump_enabled_p ())
5540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5541 "operand mode requires invariant argument.\n");
5542 return false;
5545 /* Vector shifted by vector. */
5546 if (!scalar_shift_arg)
5548 optab = optab_for_tree_code (code, vectype, optab_vector);
5549 if (dump_enabled_p ())
5550 dump_printf_loc (MSG_NOTE, vect_location,
5551 "vector/vector shift/rotate found.\n");
5553 if (!op1_vectype)
5554 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5555 if (op1_vectype == NULL_TREE
5556 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5558 if (dump_enabled_p ())
5559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5560 "unusable type for last operand in"
5561 " vector/vector shift/rotate.\n");
5562 return false;
5565 /* See if the machine has a vector shifted by scalar insn and if not
5566 then see if it has a vector shifted by vector insn. */
5567 else
5569 optab = optab_for_tree_code (code, vectype, optab_scalar);
5570 if (optab
5571 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5573 if (dump_enabled_p ())
5574 dump_printf_loc (MSG_NOTE, vect_location,
5575 "vector/scalar shift/rotate found.\n");
5577 else
5579 optab = optab_for_tree_code (code, vectype, optab_vector);
5580 if (optab
5581 && (optab_handler (optab, TYPE_MODE (vectype))
5582 != CODE_FOR_nothing))
5584 scalar_shift_arg = false;
5586 if (dump_enabled_p ())
5587 dump_printf_loc (MSG_NOTE, vect_location,
5588 "vector/vector shift/rotate found.\n");
5590 /* Unlike the other binary operators, shifts/rotates have
5591 the rhs being int, instead of the same type as the lhs,
5592 so make sure the scalar is the right type if we are
5593 dealing with vectors of long long/long/short/char. */
5594 if (dt[1] == vect_constant_def)
5595 op1 = fold_convert (TREE_TYPE (vectype), op1);
5596 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5597 TREE_TYPE (op1)))
5599 if (slp_node
5600 && TYPE_MODE (TREE_TYPE (vectype))
5601 != TYPE_MODE (TREE_TYPE (op1)))
5603 if (dump_enabled_p ())
5604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5605 "unusable type for last operand in"
5606 " vector/vector shift/rotate.\n");
5607 return false;
5609 if (vec_stmt && !slp_node)
5611 op1 = fold_convert (TREE_TYPE (vectype), op1);
5612 op1 = vect_init_vector (stmt_info, op1,
5613 TREE_TYPE (vectype), NULL);
5620 /* Supportable by target? */
5621 if (!optab)
5623 if (dump_enabled_p ())
5624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5625 "no optab.\n");
5626 return false;
5628 vec_mode = TYPE_MODE (vectype);
5629 icode = (int) optab_handler (optab, vec_mode);
5630 if (icode == CODE_FOR_nothing)
5632 if (dump_enabled_p ())
5633 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5634 "op not supported by target.\n");
5635 /* Check only during analysis. */
5636 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5637 || (!vec_stmt
5638 && !vect_worthwhile_without_simd_p (vinfo, code)))
5639 return false;
5640 if (dump_enabled_p ())
5641 dump_printf_loc (MSG_NOTE, vect_location,
5642 "proceeding using word mode.\n");
5645 /* Worthwhile without SIMD support? Check only during analysis. */
5646 if (!vec_stmt
5647 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5648 && !vect_worthwhile_without_simd_p (vinfo, code))
5650 if (dump_enabled_p ())
5651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5652 "not worthwhile without SIMD support.\n");
5653 return false;
5656 if (!vec_stmt) /* transformation not required. */
5658 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5659 DUMP_VECT_SCOPE ("vectorizable_shift");
5660 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5661 return true;
5664 /* Transform. */
5666 if (dump_enabled_p ())
5667 dump_printf_loc (MSG_NOTE, vect_location,
5668 "transform binary/unary operation.\n");
5670 /* Handle def. */
5671 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5673 prev_stmt_info = NULL;
5674 for (j = 0; j < ncopies; j++)
5676 /* Handle uses. */
5677 if (j == 0)
5679 if (scalar_shift_arg)
5681 /* Vector shl and shr insn patterns can be defined with scalar
5682 operand 2 (shift operand). In this case, use constant or loop
5683 invariant op1 directly, without extending it to vector mode
5684 first. */
5685 optab_op2_mode = insn_data[icode].operand[2].mode;
5686 if (!VECTOR_MODE_P (optab_op2_mode))
5688 if (dump_enabled_p ())
5689 dump_printf_loc (MSG_NOTE, vect_location,
5690 "operand 1 using scalar mode.\n");
5691 vec_oprnd1 = op1;
5692 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5693 vec_oprnds1.quick_push (vec_oprnd1);
5694 if (slp_node)
5696 /* Store vec_oprnd1 for every vector stmt to be created
5697 for SLP_NODE. We check during the analysis that all
5698 the shift arguments are the same.
5699 TODO: Allow different constants for different vector
5700 stmts generated for an SLP instance. */
5701 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5702 vec_oprnds1.quick_push (vec_oprnd1);
5707 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5708 (a special case for certain kind of vector shifts); otherwise,
5709 operand 1 should be of a vector type (the usual case). */
5710 if (vec_oprnd1)
5711 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5712 slp_node);
5713 else
5714 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5715 slp_node);
5717 else
5718 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5720 /* Arguments are ready. Create the new vector stmt. */
5721 stmt_vec_info new_stmt_info = NULL;
5722 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5724 vop1 = vec_oprnds1[i];
5725 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5726 new_temp = make_ssa_name (vec_dest, new_stmt);
5727 gimple_assign_set_lhs (new_stmt, new_temp);
5728 new_stmt_info
5729 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5730 if (slp_node)
5731 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5734 if (slp_node)
5735 continue;
5737 if (j == 0)
5738 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5739 else
5740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5741 prev_stmt_info = new_stmt_info;
5744 vec_oprnds0.release ();
5745 vec_oprnds1.release ();
5747 return true;
5751 /* Function vectorizable_operation.
5753 Check if STMT_INFO performs a binary, unary or ternary operation that can
5754 be vectorized.
5755 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5756 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5757 Return true if STMT_INFO is vectorizable in this way. */
5759 static bool
5760 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5761 stmt_vec_info *vec_stmt, slp_tree slp_node,
5762 stmt_vector_for_cost *cost_vec)
5764 tree vec_dest;
5765 tree scalar_dest;
5766 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5767 tree vectype;
5768 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5769 enum tree_code code, orig_code;
5770 machine_mode vec_mode;
5771 tree new_temp;
5772 int op_type;
5773 optab optab;
5774 bool target_support_p;
5775 enum vect_def_type dt[3]
5776 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5777 int ndts = 3;
5778 stmt_vec_info prev_stmt_info;
5779 poly_uint64 nunits_in;
5780 poly_uint64 nunits_out;
5781 tree vectype_out;
5782 int ncopies;
5783 int j, i;
5784 vec<tree> vec_oprnds0 = vNULL;
5785 vec<tree> vec_oprnds1 = vNULL;
5786 vec<tree> vec_oprnds2 = vNULL;
5787 tree vop0, vop1, vop2;
5788 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5789 vec_info *vinfo = stmt_info->vinfo;
5791 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5792 return false;
5794 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5795 && ! vec_stmt)
5796 return false;
5798 /* Is STMT a vectorizable binary/unary operation? */
5799 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5800 if (!stmt)
5801 return false;
5803 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5804 return false;
5806 orig_code = code = gimple_assign_rhs_code (stmt);
5808 /* For pointer addition and subtraction, we should use the normal
5809 plus and minus for the vector operation. */
5810 if (code == POINTER_PLUS_EXPR)
5811 code = PLUS_EXPR;
5812 if (code == POINTER_DIFF_EXPR)
5813 code = MINUS_EXPR;
5815 /* Support only unary or binary operations. */
5816 op_type = TREE_CODE_LENGTH (code);
5817 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5819 if (dump_enabled_p ())
5820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5821 "num. args = %d (not unary/binary/ternary op).\n",
5822 op_type);
5823 return false;
5826 scalar_dest = gimple_assign_lhs (stmt);
5827 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5829 /* Most operations cannot handle bit-precision types without extra
5830 truncations. */
5831 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5832 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5833 /* Exception are bitwise binary operations. */
5834 && code != BIT_IOR_EXPR
5835 && code != BIT_XOR_EXPR
5836 && code != BIT_AND_EXPR)
5838 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5840 "bit-precision arithmetic not supported.\n");
5841 return false;
5844 op0 = gimple_assign_rhs1 (stmt);
5845 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5847 if (dump_enabled_p ())
5848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5849 "use not simple.\n");
5850 return false;
5852 /* If op0 is an external or constant def use a vector type with
5853 the same size as the output vector type. */
5854 if (!vectype)
5856 /* For boolean type we cannot determine vectype by
5857 invariant value (don't know whether it is a vector
5858 of booleans or vector of integers). We use output
5859 vectype because operations on boolean don't change
5860 type. */
5861 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5863 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5865 if (dump_enabled_p ())
5866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5867 "not supported operation on bool value.\n");
5868 return false;
5870 vectype = vectype_out;
5872 else
5873 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5875 if (vec_stmt)
5876 gcc_assert (vectype);
5877 if (!vectype)
5879 if (dump_enabled_p ())
5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5881 "no vectype for scalar type %T\n",
5882 TREE_TYPE (op0));
5884 return false;
5887 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5888 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5889 if (maybe_ne (nunits_out, nunits_in))
5890 return false;
5892 if (op_type == binary_op || op_type == ternary_op)
5894 op1 = gimple_assign_rhs2 (stmt);
5895 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5897 if (dump_enabled_p ())
5898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5899 "use not simple.\n");
5900 return false;
5903 if (op_type == ternary_op)
5905 op2 = gimple_assign_rhs3 (stmt);
5906 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
5908 if (dump_enabled_p ())
5909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5910 "use not simple.\n");
5911 return false;
5915 /* Multiple types in SLP are handled by creating the appropriate number of
5916 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5917 case of SLP. */
5918 if (slp_node)
5919 ncopies = 1;
5920 else
5921 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5923 gcc_assert (ncopies >= 1);
5925 /* Shifts are handled in vectorizable_shift (). */
5926 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5927 || code == RROTATE_EXPR)
5928 return false;
5930 /* Supportable by target? */
5932 vec_mode = TYPE_MODE (vectype);
5933 if (code == MULT_HIGHPART_EXPR)
5934 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5935 else
5937 optab = optab_for_tree_code (code, vectype, optab_default);
5938 if (!optab)
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5942 "no optab.\n");
5943 return false;
5945 target_support_p = (optab_handler (optab, vec_mode)
5946 != CODE_FOR_nothing);
5949 if (!target_support_p)
5951 if (dump_enabled_p ())
5952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5953 "op not supported by target.\n");
5954 /* Check only during analysis. */
5955 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5956 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5957 return false;
5958 if (dump_enabled_p ())
5959 dump_printf_loc (MSG_NOTE, vect_location,
5960 "proceeding using word mode.\n");
5963 /* Worthwhile without SIMD support? Check only during analysis. */
5964 if (!VECTOR_MODE_P (vec_mode)
5965 && !vec_stmt
5966 && !vect_worthwhile_without_simd_p (vinfo, code))
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5970 "not worthwhile without SIMD support.\n");
5971 return false;
5974 if (!vec_stmt) /* transformation not required. */
5976 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5977 DUMP_VECT_SCOPE ("vectorizable_operation");
5978 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5979 return true;
5982 /* Transform. */
5984 if (dump_enabled_p ())
5985 dump_printf_loc (MSG_NOTE, vect_location,
5986 "transform binary/unary operation.\n");
5988 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5989 vectors with unsigned elements, but the result is signed. So, we
5990 need to compute the MINUS_EXPR into vectype temporary and
5991 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5992 tree vec_cvt_dest = NULL_TREE;
5993 if (orig_code == POINTER_DIFF_EXPR)
5995 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5996 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5998 /* Handle def. */
5999 else
6000 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6002 /* In case the vectorization factor (VF) is bigger than the number
6003 of elements that we can fit in a vectype (nunits), we have to generate
6004 more than one vector stmt - i.e - we need to "unroll" the
6005 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6006 from one copy of the vector stmt to the next, in the field
6007 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6008 stages to find the correct vector defs to be used when vectorizing
6009 stmts that use the defs of the current stmt. The example below
6010 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6011 we need to create 4 vectorized stmts):
6013 before vectorization:
6014 RELATED_STMT VEC_STMT
6015 S1: x = memref - -
6016 S2: z = x + 1 - -
6018 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6019 there):
6020 RELATED_STMT VEC_STMT
6021 VS1_0: vx0 = memref0 VS1_1 -
6022 VS1_1: vx1 = memref1 VS1_2 -
6023 VS1_2: vx2 = memref2 VS1_3 -
6024 VS1_3: vx3 = memref3 - -
6025 S1: x = load - VS1_0
6026 S2: z = x + 1 - -
6028 step2: vectorize stmt S2 (done here):
6029 To vectorize stmt S2 we first need to find the relevant vector
6030 def for the first operand 'x'. This is, as usual, obtained from
6031 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6032 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6033 relevant vector def 'vx0'. Having found 'vx0' we can generate
6034 the vector stmt VS2_0, and as usual, record it in the
6035 STMT_VINFO_VEC_STMT of stmt S2.
6036 When creating the second copy (VS2_1), we obtain the relevant vector
6037 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6038 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6039 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6040 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6041 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6042 chain of stmts and pointers:
6043 RELATED_STMT VEC_STMT
6044 VS1_0: vx0 = memref0 VS1_1 -
6045 VS1_1: vx1 = memref1 VS1_2 -
6046 VS1_2: vx2 = memref2 VS1_3 -
6047 VS1_3: vx3 = memref3 - -
6048 S1: x = load - VS1_0
6049 VS2_0: vz0 = vx0 + v1 VS2_1 -
6050 VS2_1: vz1 = vx1 + v1 VS2_2 -
6051 VS2_2: vz2 = vx2 + v1 VS2_3 -
6052 VS2_3: vz3 = vx3 + v1 - -
6053 S2: z = x + 1 - VS2_0 */
6055 prev_stmt_info = NULL;
6056 for (j = 0; j < ncopies; j++)
6058 /* Handle uses. */
6059 if (j == 0)
6061 if (op_type == binary_op)
6062 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6063 slp_node);
6064 else if (op_type == ternary_op)
6066 if (slp_node)
6068 auto_vec<tree> ops(3);
6069 ops.quick_push (op0);
6070 ops.quick_push (op1);
6071 ops.quick_push (op2);
6072 auto_vec<vec<tree> > vec_defs(3);
6073 vect_get_slp_defs (ops, slp_node, &vec_defs);
6074 vec_oprnds0 = vec_defs[0];
6075 vec_oprnds1 = vec_defs[1];
6076 vec_oprnds2 = vec_defs[2];
6078 else
6080 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6081 &vec_oprnds1, NULL);
6082 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6083 NULL, NULL);
6086 else
6087 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6088 slp_node);
6090 else
6092 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6093 if (op_type == ternary_op)
6095 tree vec_oprnd = vec_oprnds2.pop ();
6096 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6097 vec_oprnd));
6101 /* Arguments are ready. Create the new vector stmt. */
6102 stmt_vec_info new_stmt_info = NULL;
6103 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6105 vop1 = ((op_type == binary_op || op_type == ternary_op)
6106 ? vec_oprnds1[i] : NULL_TREE);
6107 vop2 = ((op_type == ternary_op)
6108 ? vec_oprnds2[i] : NULL_TREE);
6109 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6110 vop0, vop1, vop2);
6111 new_temp = make_ssa_name (vec_dest, new_stmt);
6112 gimple_assign_set_lhs (new_stmt, new_temp);
6113 new_stmt_info
6114 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6115 if (vec_cvt_dest)
6117 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6118 gassign *new_stmt
6119 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6120 new_temp);
6121 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6122 gimple_assign_set_lhs (new_stmt, new_temp);
6123 new_stmt_info
6124 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6126 if (slp_node)
6127 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6130 if (slp_node)
6131 continue;
6133 if (j == 0)
6134 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6135 else
6136 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6137 prev_stmt_info = new_stmt_info;
6140 vec_oprnds0.release ();
6141 vec_oprnds1.release ();
6142 vec_oprnds2.release ();
6144 return true;
6147 /* A helper function to ensure data reference DR_INFO's base alignment. */
6149 static void
6150 ensure_base_align (dr_vec_info *dr_info)
6152 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6153 return;
6155 if (dr_info->base_misaligned)
6157 tree base_decl = dr_info->base_decl;
6159 // We should only be able to increase the alignment of a base object if
6160 // we know what its new alignment should be at compile time.
6161 unsigned HOST_WIDE_INT align_base_to =
6162 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6164 if (decl_in_symtab_p (base_decl))
6165 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6166 else
6168 SET_DECL_ALIGN (base_decl, align_base_to);
6169 DECL_USER_ALIGN (base_decl) = 1;
6171 dr_info->base_misaligned = false;
6176 /* Function get_group_alias_ptr_type.
6178 Return the alias type for the group starting at FIRST_STMT_INFO. */
6180 static tree
6181 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6183 struct data_reference *first_dr, *next_dr;
6185 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6186 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6187 while (next_stmt_info)
6189 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6190 if (get_alias_set (DR_REF (first_dr))
6191 != get_alias_set (DR_REF (next_dr)))
6193 if (dump_enabled_p ())
6194 dump_printf_loc (MSG_NOTE, vect_location,
6195 "conflicting alias set types.\n");
6196 return ptr_type_node;
6198 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6200 return reference_alias_ptr_type (DR_REF (first_dr));
6204 /* Function vectorizable_store.
6206 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6207 that can be vectorized.
6208 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6209 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6210 Return true if STMT_INFO is vectorizable in this way. */
6212 static bool
6213 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6214 stmt_vec_info *vec_stmt, slp_tree slp_node,
6215 stmt_vector_for_cost *cost_vec)
6217 tree data_ref;
6218 tree op;
6219 tree vec_oprnd = NULL_TREE;
6220 tree elem_type;
6221 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6222 struct loop *loop = NULL;
6223 machine_mode vec_mode;
6224 tree dummy;
6225 enum dr_alignment_support alignment_support_scheme;
6226 enum vect_def_type rhs_dt = vect_unknown_def_type;
6227 enum vect_def_type mask_dt = vect_unknown_def_type;
6228 stmt_vec_info prev_stmt_info = NULL;
6229 tree dataref_ptr = NULL_TREE;
6230 tree dataref_offset = NULL_TREE;
6231 gimple *ptr_incr = NULL;
6232 int ncopies;
6233 int j;
6234 stmt_vec_info first_stmt_info;
6235 bool grouped_store;
6236 unsigned int group_size, i;
6237 vec<tree> oprnds = vNULL;
6238 vec<tree> result_chain = vNULL;
6239 tree offset = NULL_TREE;
6240 vec<tree> vec_oprnds = vNULL;
6241 bool slp = (slp_node != NULL);
6242 unsigned int vec_num;
6243 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6244 vec_info *vinfo = stmt_info->vinfo;
6245 tree aggr_type;
6246 gather_scatter_info gs_info;
6247 poly_uint64 vf;
6248 vec_load_store_type vls_type;
6249 tree ref_type;
6251 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6252 return false;
6254 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6255 && ! vec_stmt)
6256 return false;
6258 /* Is vectorizable store? */
6260 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6261 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6263 tree scalar_dest = gimple_assign_lhs (assign);
6264 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6265 && is_pattern_stmt_p (stmt_info))
6266 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6267 if (TREE_CODE (scalar_dest) != ARRAY_REF
6268 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6269 && TREE_CODE (scalar_dest) != INDIRECT_REF
6270 && TREE_CODE (scalar_dest) != COMPONENT_REF
6271 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6272 && TREE_CODE (scalar_dest) != REALPART_EXPR
6273 && TREE_CODE (scalar_dest) != MEM_REF)
6274 return false;
6276 else
6278 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6279 if (!call || !gimple_call_internal_p (call))
6280 return false;
6282 internal_fn ifn = gimple_call_internal_fn (call);
6283 if (!internal_store_fn_p (ifn))
6284 return false;
6286 if (slp_node != NULL)
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6290 "SLP of masked stores not supported.\n");
6291 return false;
6294 int mask_index = internal_fn_mask_index (ifn);
6295 if (mask_index >= 0)
6297 mask = gimple_call_arg (call, mask_index);
6298 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6299 &mask_vectype))
6300 return false;
6304 op = vect_get_store_rhs (stmt_info);
6306 /* Cannot have hybrid store SLP -- that would mean storing to the
6307 same location twice. */
6308 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6310 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6311 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6313 if (loop_vinfo)
6315 loop = LOOP_VINFO_LOOP (loop_vinfo);
6316 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6318 else
6319 vf = 1;
6321 /* Multiple types in SLP are handled by creating the appropriate number of
6322 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6323 case of SLP. */
6324 if (slp)
6325 ncopies = 1;
6326 else
6327 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6329 gcc_assert (ncopies >= 1);
6331 /* FORNOW. This restriction should be relaxed. */
6332 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6334 if (dump_enabled_p ())
6335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6336 "multiple types in nested loop.\n");
6337 return false;
6340 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6341 return false;
6343 elem_type = TREE_TYPE (vectype);
6344 vec_mode = TYPE_MODE (vectype);
6346 if (!STMT_VINFO_DATA_REF (stmt_info))
6347 return false;
6349 vect_memory_access_type memory_access_type;
6350 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6351 &memory_access_type, &gs_info))
6352 return false;
6354 if (mask)
6356 if (memory_access_type == VMAT_CONTIGUOUS)
6358 if (!VECTOR_MODE_P (vec_mode)
6359 || !can_vec_mask_load_store_p (vec_mode,
6360 TYPE_MODE (mask_vectype), false))
6361 return false;
6363 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6364 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6366 if (dump_enabled_p ())
6367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6368 "unsupported access type for masked store.\n");
6369 return false;
6372 else
6374 /* FORNOW. In some cases can vectorize even if data-type not supported
6375 (e.g. - array initialization with 0). */
6376 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6377 return false;
6380 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
6381 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6382 && memory_access_type != VMAT_GATHER_SCATTER
6383 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6384 if (grouped_store)
6386 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6387 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6388 group_size = DR_GROUP_SIZE (first_stmt_info);
6390 else
6392 first_stmt_info = stmt_info;
6393 first_dr_info = dr_info;
6394 group_size = vec_num = 1;
6397 if (!vec_stmt) /* transformation not required. */
6399 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6401 if (loop_vinfo
6402 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6403 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6404 memory_access_type, &gs_info);
6406 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6407 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6408 vls_type, slp_node, cost_vec);
6409 return true;
6411 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6413 /* Transform. */
6415 ensure_base_align (dr_info);
6417 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6419 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6420 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6421 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6422 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6423 edge pe = loop_preheader_edge (loop);
6424 gimple_seq seq;
6425 basic_block new_bb;
6426 enum { NARROW, NONE, WIDEN } modifier;
6427 poly_uint64 scatter_off_nunits
6428 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6430 if (known_eq (nunits, scatter_off_nunits))
6431 modifier = NONE;
6432 else if (known_eq (nunits * 2, scatter_off_nunits))
6434 modifier = WIDEN;
6436 /* Currently gathers and scatters are only supported for
6437 fixed-length vectors. */
6438 unsigned int count = scatter_off_nunits.to_constant ();
6439 vec_perm_builder sel (count, count, 1);
6440 for (i = 0; i < (unsigned int) count; ++i)
6441 sel.quick_push (i | (count / 2));
6443 vec_perm_indices indices (sel, 1, count);
6444 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6445 indices);
6446 gcc_assert (perm_mask != NULL_TREE);
6448 else if (known_eq (nunits, scatter_off_nunits * 2))
6450 modifier = NARROW;
6452 /* Currently gathers and scatters are only supported for
6453 fixed-length vectors. */
6454 unsigned int count = nunits.to_constant ();
6455 vec_perm_builder sel (count, count, 1);
6456 for (i = 0; i < (unsigned int) count; ++i)
6457 sel.quick_push (i | (count / 2));
6459 vec_perm_indices indices (sel, 2, count);
6460 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6461 gcc_assert (perm_mask != NULL_TREE);
6462 ncopies *= 2;
6464 else
6465 gcc_unreachable ();
6467 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6468 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6469 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6470 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6471 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6472 scaletype = TREE_VALUE (arglist);
6474 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6475 && TREE_CODE (rettype) == VOID_TYPE);
6477 ptr = fold_convert (ptrtype, gs_info.base);
6478 if (!is_gimple_min_invariant (ptr))
6480 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6481 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6482 gcc_assert (!new_bb);
6485 /* Currently we support only unconditional scatter stores,
6486 so mask should be all ones. */
6487 mask = build_int_cst (masktype, -1);
6488 mask = vect_init_vector (stmt_info, mask, masktype, NULL);
6490 scale = build_int_cst (scaletype, gs_info.scale);
6492 prev_stmt_info = NULL;
6493 for (j = 0; j < ncopies; ++j)
6495 if (j == 0)
6497 src = vec_oprnd1
6498 = vect_get_vec_def_for_operand (op, stmt_info);
6499 op = vec_oprnd0
6500 = vect_get_vec_def_for_operand (gs_info.offset, stmt_info);
6502 else if (modifier != NONE && (j & 1))
6504 if (modifier == WIDEN)
6506 src = vec_oprnd1
6507 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
6508 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6509 stmt_info, gsi);
6511 else if (modifier == NARROW)
6513 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6514 stmt_info, gsi);
6515 op = vec_oprnd0
6516 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
6518 else
6519 gcc_unreachable ();
6521 else
6523 src = vec_oprnd1
6524 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
6525 op = vec_oprnd0
6526 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
6529 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6531 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6532 TYPE_VECTOR_SUBPARTS (srctype)));
6533 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6534 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6535 gassign *new_stmt
6536 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6537 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6538 src = var;
6541 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6543 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6544 TYPE_VECTOR_SUBPARTS (idxtype)));
6545 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6546 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6547 gassign *new_stmt
6548 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6549 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6550 op = var;
6553 gcall *new_stmt
6554 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6555 stmt_vec_info new_stmt_info
6556 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6558 if (prev_stmt_info == NULL)
6559 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6560 else
6561 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6562 prev_stmt_info = new_stmt_info;
6564 return true;
6567 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6568 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6570 if (grouped_store)
6572 /* FORNOW */
6573 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6575 /* We vectorize all the stmts of the interleaving group when we
6576 reach the last stmt in the group. */
6577 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6578 < DR_GROUP_SIZE (first_stmt_info)
6579 && !slp)
6581 *vec_stmt = NULL;
6582 return true;
6585 if (slp)
6587 grouped_store = false;
6588 /* VEC_NUM is the number of vect stmts to be created for this
6589 group. */
6590 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6591 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6592 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6593 == first_stmt_info);
6594 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6595 op = vect_get_store_rhs (first_stmt_info);
6597 else
6598 /* VEC_NUM is the number of vect stmts to be created for this
6599 group. */
6600 vec_num = group_size;
6602 ref_type = get_group_alias_ptr_type (first_stmt_info);
6604 else
6605 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
6607 if (dump_enabled_p ())
6608 dump_printf_loc (MSG_NOTE, vect_location,
6609 "transform store. ncopies = %d\n", ncopies);
6611 if (memory_access_type == VMAT_ELEMENTWISE
6612 || memory_access_type == VMAT_STRIDED_SLP)
6614 gimple_stmt_iterator incr_gsi;
6615 bool insert_after;
6616 gimple *incr;
6617 tree offvar;
6618 tree ivstep;
6619 tree running_off;
6620 tree stride_base, stride_step, alias_off;
6621 tree vec_oprnd;
6622 unsigned int g;
6623 /* Checked by get_load_store_type. */
6624 unsigned int const_nunits = nunits.to_constant ();
6626 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6627 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6629 stride_base
6630 = fold_build_pointer_plus
6631 (DR_BASE_ADDRESS (first_dr_info->dr),
6632 size_binop (PLUS_EXPR,
6633 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6634 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6635 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
6637 /* For a store with loop-invariant (but other than power-of-2)
6638 stride (i.e. not a grouped access) like so:
6640 for (i = 0; i < n; i += stride)
6641 array[i] = ...;
6643 we generate a new induction variable and new stores from
6644 the components of the (vectorized) rhs:
6646 for (j = 0; ; j += VF*stride)
6647 vectemp = ...;
6648 tmp1 = vectemp[0];
6649 array[j] = tmp1;
6650 tmp2 = vectemp[1];
6651 array[j + stride] = tmp2;
6655 unsigned nstores = const_nunits;
6656 unsigned lnel = 1;
6657 tree ltype = elem_type;
6658 tree lvectype = vectype;
6659 if (slp)
6661 if (group_size < const_nunits
6662 && const_nunits % group_size == 0)
6664 nstores = const_nunits / group_size;
6665 lnel = group_size;
6666 ltype = build_vector_type (elem_type, group_size);
6667 lvectype = vectype;
6669 /* First check if vec_extract optab doesn't support extraction
6670 of vector elts directly. */
6671 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6672 machine_mode vmode;
6673 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6674 || !VECTOR_MODE_P (vmode)
6675 || !targetm.vector_mode_supported_p (vmode)
6676 || (convert_optab_handler (vec_extract_optab,
6677 TYPE_MODE (vectype), vmode)
6678 == CODE_FOR_nothing))
6680 /* Try to avoid emitting an extract of vector elements
6681 by performing the extracts using an integer type of the
6682 same size, extracting from a vector of those and then
6683 re-interpreting it as the original vector type if
6684 supported. */
6685 unsigned lsize
6686 = group_size * GET_MODE_BITSIZE (elmode);
6687 unsigned int lnunits = const_nunits / group_size;
6688 /* If we can't construct such a vector fall back to
6689 element extracts from the original vector type and
6690 element size stores. */
6691 if (int_mode_for_size (lsize, 0).exists (&elmode)
6692 && mode_for_vector (elmode, lnunits).exists (&vmode)
6693 && VECTOR_MODE_P (vmode)
6694 && targetm.vector_mode_supported_p (vmode)
6695 && (convert_optab_handler (vec_extract_optab,
6696 vmode, elmode)
6697 != CODE_FOR_nothing))
6699 nstores = lnunits;
6700 lnel = group_size;
6701 ltype = build_nonstandard_integer_type (lsize, 1);
6702 lvectype = build_vector_type (ltype, nstores);
6704 /* Else fall back to vector extraction anyway.
6705 Fewer stores are more important than avoiding spilling
6706 of the vector we extract from. Compared to the
6707 construction case in vectorizable_load no store-forwarding
6708 issue exists here for reasonable archs. */
6711 else if (group_size >= const_nunits
6712 && group_size % const_nunits == 0)
6714 nstores = 1;
6715 lnel = const_nunits;
6716 ltype = vectype;
6717 lvectype = vectype;
6719 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6720 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6723 ivstep = stride_step;
6724 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6725 build_int_cst (TREE_TYPE (ivstep), vf));
6727 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6729 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6730 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6731 create_iv (stride_base, ivstep, NULL,
6732 loop, &incr_gsi, insert_after,
6733 &offvar, NULL);
6734 incr = gsi_stmt (incr_gsi);
6735 loop_vinfo->add_stmt (incr);
6737 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6739 prev_stmt_info = NULL;
6740 alias_off = build_int_cst (ref_type, 0);
6741 stmt_vec_info next_stmt_info = first_stmt_info;
6742 for (g = 0; g < group_size; g++)
6744 running_off = offvar;
6745 if (g)
6747 tree size = TYPE_SIZE_UNIT (ltype);
6748 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6749 size);
6750 tree newoff = copy_ssa_name (running_off, NULL);
6751 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6752 running_off, pos);
6753 vect_finish_stmt_generation (stmt_info, incr, gsi);
6754 running_off = newoff;
6756 unsigned int group_el = 0;
6757 unsigned HOST_WIDE_INT
6758 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6759 for (j = 0; j < ncopies; j++)
6761 /* We've set op and dt above, from vect_get_store_rhs,
6762 and first_stmt_info == stmt_info. */
6763 if (j == 0)
6765 if (slp)
6767 vect_get_vec_defs (op, NULL_TREE, stmt_info,
6768 &vec_oprnds, NULL, slp_node);
6769 vec_oprnd = vec_oprnds[0];
6771 else
6773 op = vect_get_store_rhs (next_stmt_info);
6774 vec_oprnd = vect_get_vec_def_for_operand
6775 (op, next_stmt_info);
6778 else
6780 if (slp)
6781 vec_oprnd = vec_oprnds[j];
6782 else
6783 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6784 vec_oprnd);
6786 /* Pun the vector to extract from if necessary. */
6787 if (lvectype != vectype)
6789 tree tem = make_ssa_name (lvectype);
6790 gimple *pun
6791 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6792 lvectype, vec_oprnd));
6793 vect_finish_stmt_generation (stmt_info, pun, gsi);
6794 vec_oprnd = tem;
6796 for (i = 0; i < nstores; i++)
6798 tree newref, newoff;
6799 gimple *incr, *assign;
6800 tree size = TYPE_SIZE (ltype);
6801 /* Extract the i'th component. */
6802 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6803 bitsize_int (i), size);
6804 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6805 size, pos);
6807 elem = force_gimple_operand_gsi (gsi, elem, true,
6808 NULL_TREE, true,
6809 GSI_SAME_STMT);
6811 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6812 group_el * elsz);
6813 newref = build2 (MEM_REF, ltype,
6814 running_off, this_off);
6815 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
6817 /* And store it to *running_off. */
6818 assign = gimple_build_assign (newref, elem);
6819 stmt_vec_info assign_info
6820 = vect_finish_stmt_generation (stmt_info, assign, gsi);
6822 group_el += lnel;
6823 if (! slp
6824 || group_el == group_size)
6826 newoff = copy_ssa_name (running_off, NULL);
6827 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6828 running_off, stride_step);
6829 vect_finish_stmt_generation (stmt_info, incr, gsi);
6831 running_off = newoff;
6832 group_el = 0;
6834 if (g == group_size - 1
6835 && !slp)
6837 if (j == 0 && i == 0)
6838 STMT_VINFO_VEC_STMT (stmt_info)
6839 = *vec_stmt = assign_info;
6840 else
6841 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6842 prev_stmt_info = assign_info;
6846 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6847 if (slp)
6848 break;
6851 vec_oprnds.release ();
6852 return true;
6855 auto_vec<tree> dr_chain (group_size);
6856 oprnds.create (group_size);
6858 alignment_support_scheme
6859 = vect_supportable_dr_alignment (first_dr_info, false);
6860 gcc_assert (alignment_support_scheme);
6861 vec_loop_masks *loop_masks
6862 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6863 ? &LOOP_VINFO_MASKS (loop_vinfo)
6864 : NULL);
6865 /* Targets with store-lane instructions must not require explicit
6866 realignment. vect_supportable_dr_alignment always returns either
6867 dr_aligned or dr_unaligned_supported for masked operations. */
6868 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6869 && !mask
6870 && !loop_masks)
6871 || alignment_support_scheme == dr_aligned
6872 || alignment_support_scheme == dr_unaligned_supported);
6874 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6875 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6876 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6878 tree bump;
6879 tree vec_offset = NULL_TREE;
6880 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6882 aggr_type = NULL_TREE;
6883 bump = NULL_TREE;
6885 else if (memory_access_type == VMAT_GATHER_SCATTER)
6887 aggr_type = elem_type;
6888 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
6889 &bump, &vec_offset);
6891 else
6893 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6894 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6895 else
6896 aggr_type = vectype;
6897 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
6898 memory_access_type);
6901 if (mask)
6902 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6904 /* In case the vectorization factor (VF) is bigger than the number
6905 of elements that we can fit in a vectype (nunits), we have to generate
6906 more than one vector stmt - i.e - we need to "unroll" the
6907 vector stmt by a factor VF/nunits. For more details see documentation in
6908 vect_get_vec_def_for_copy_stmt. */
6910 /* In case of interleaving (non-unit grouped access):
6912 S1: &base + 2 = x2
6913 S2: &base = x0
6914 S3: &base + 1 = x1
6915 S4: &base + 3 = x3
6917 We create vectorized stores starting from base address (the access of the
6918 first stmt in the chain (S2 in the above example), when the last store stmt
6919 of the chain (S4) is reached:
6921 VS1: &base = vx2
6922 VS2: &base + vec_size*1 = vx0
6923 VS3: &base + vec_size*2 = vx1
6924 VS4: &base + vec_size*3 = vx3
6926 Then permutation statements are generated:
6928 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6929 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6932 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6933 (the order of the data-refs in the output of vect_permute_store_chain
6934 corresponds to the order of scalar stmts in the interleaving chain - see
6935 the documentation of vect_permute_store_chain()).
6937 In case of both multiple types and interleaving, above vector stores and
6938 permutation stmts are created for every copy. The result vector stmts are
6939 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6940 STMT_VINFO_RELATED_STMT for the next copies.
6943 prev_stmt_info = NULL;
6944 tree vec_mask = NULL_TREE;
6945 for (j = 0; j < ncopies; j++)
6947 stmt_vec_info new_stmt_info;
6948 if (j == 0)
6950 if (slp)
6952 /* Get vectorized arguments for SLP_NODE. */
6953 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
6954 NULL, slp_node);
6956 vec_oprnd = vec_oprnds[0];
6958 else
6960 /* For interleaved stores we collect vectorized defs for all the
6961 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6962 used as an input to vect_permute_store_chain(), and OPRNDS as
6963 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6965 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6966 OPRNDS are of size 1. */
6967 stmt_vec_info next_stmt_info = first_stmt_info;
6968 for (i = 0; i < group_size; i++)
6970 /* Since gaps are not supported for interleaved stores,
6971 DR_GROUP_SIZE is the exact number of stmts in the chain.
6972 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6973 that there is no interleaving, DR_GROUP_SIZE is 1,
6974 and only one iteration of the loop will be executed. */
6975 op = vect_get_store_rhs (next_stmt_info);
6976 vec_oprnd = vect_get_vec_def_for_operand
6977 (op, next_stmt_info);
6978 dr_chain.quick_push (vec_oprnd);
6979 oprnds.quick_push (vec_oprnd);
6980 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6982 if (mask)
6983 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
6984 mask_vectype);
6987 /* We should have catched mismatched types earlier. */
6988 gcc_assert (useless_type_conversion_p (vectype,
6989 TREE_TYPE (vec_oprnd)));
6990 bool simd_lane_access_p
6991 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6992 if (simd_lane_access_p
6993 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
6994 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
6995 && integer_zerop (DR_OFFSET (first_dr_info->dr))
6996 && integer_zerop (DR_INIT (first_dr_info->dr))
6997 && alias_sets_conflict_p (get_alias_set (aggr_type),
6998 get_alias_set (TREE_TYPE (ref_type))))
7000 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7001 dataref_offset = build_int_cst (ref_type, 0);
7003 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7004 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
7005 &dataref_ptr, &vec_offset);
7006 else
7007 dataref_ptr
7008 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
7009 simd_lane_access_p ? loop : NULL,
7010 offset, &dummy, gsi, &ptr_incr,
7011 simd_lane_access_p, NULL_TREE, bump);
7013 else
7015 /* For interleaved stores we created vectorized defs for all the
7016 defs stored in OPRNDS in the previous iteration (previous copy).
7017 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7018 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7019 next copy.
7020 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7021 OPRNDS are of size 1. */
7022 for (i = 0; i < group_size; i++)
7024 op = oprnds[i];
7025 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
7026 dr_chain[i] = vec_oprnd;
7027 oprnds[i] = vec_oprnd;
7029 if (mask)
7030 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
7031 if (dataref_offset)
7032 dataref_offset
7033 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7034 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7035 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7036 else
7037 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7038 stmt_info, bump);
7041 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7043 tree vec_array;
7045 /* Get an array into which we can store the individual vectors. */
7046 vec_array = create_vector_array (vectype, vec_num);
7048 /* Invalidate the current contents of VEC_ARRAY. This should
7049 become an RTL clobber too, which prevents the vector registers
7050 from being upward-exposed. */
7051 vect_clobber_variable (stmt_info, gsi, vec_array);
7053 /* Store the individual vectors into the array. */
7054 for (i = 0; i < vec_num; i++)
7056 vec_oprnd = dr_chain[i];
7057 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7060 tree final_mask = NULL;
7061 if (loop_masks)
7062 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7063 vectype, j);
7064 if (vec_mask)
7065 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7066 vec_mask, gsi);
7068 gcall *call;
7069 if (final_mask)
7071 /* Emit:
7072 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7073 VEC_ARRAY). */
7074 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7075 tree alias_ptr = build_int_cst (ref_type, align);
7076 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7077 dataref_ptr, alias_ptr,
7078 final_mask, vec_array);
7080 else
7082 /* Emit:
7083 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7084 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7085 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7086 vec_array);
7087 gimple_call_set_lhs (call, data_ref);
7089 gimple_call_set_nothrow (call, true);
7090 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7092 /* Record that VEC_ARRAY is now dead. */
7093 vect_clobber_variable (stmt_info, gsi, vec_array);
7095 else
7097 new_stmt_info = NULL;
7098 if (grouped_store)
7100 if (j == 0)
7101 result_chain.create (group_size);
7102 /* Permute. */
7103 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7104 &result_chain);
7107 stmt_vec_info next_stmt_info = first_stmt_info;
7108 for (i = 0; i < vec_num; i++)
7110 unsigned misalign;
7111 unsigned HOST_WIDE_INT align;
7113 tree final_mask = NULL_TREE;
7114 if (loop_masks)
7115 final_mask = vect_get_loop_mask (gsi, loop_masks,
7116 vec_num * ncopies,
7117 vectype, vec_num * j + i);
7118 if (vec_mask)
7119 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7120 vec_mask, gsi);
7122 if (memory_access_type == VMAT_GATHER_SCATTER)
7124 tree scale = size_int (gs_info.scale);
7125 gcall *call;
7126 if (loop_masks)
7127 call = gimple_build_call_internal
7128 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7129 scale, vec_oprnd, final_mask);
7130 else
7131 call = gimple_build_call_internal
7132 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7133 scale, vec_oprnd);
7134 gimple_call_set_nothrow (call, true);
7135 new_stmt_info
7136 = vect_finish_stmt_generation (stmt_info, call, gsi);
7137 break;
7140 if (i > 0)
7141 /* Bump the vector pointer. */
7142 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7143 stmt_info, bump);
7145 if (slp)
7146 vec_oprnd = vec_oprnds[i];
7147 else if (grouped_store)
7148 /* For grouped stores vectorized defs are interleaved in
7149 vect_permute_store_chain(). */
7150 vec_oprnd = result_chain[i];
7152 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
7153 if (aligned_access_p (first_dr_info))
7154 misalign = 0;
7155 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7157 align = dr_alignment (vect_dr_behavior (first_dr_info));
7158 misalign = 0;
7160 else
7161 misalign = DR_MISALIGNMENT (first_dr_info);
7162 if (dataref_offset == NULL_TREE
7163 && TREE_CODE (dataref_ptr) == SSA_NAME)
7164 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7165 misalign);
7167 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7169 tree perm_mask = perm_mask_for_reverse (vectype);
7170 tree perm_dest = vect_create_destination_var
7171 (vect_get_store_rhs (stmt_info), vectype);
7172 tree new_temp = make_ssa_name (perm_dest);
7174 /* Generate the permute statement. */
7175 gimple *perm_stmt
7176 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7177 vec_oprnd, perm_mask);
7178 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7180 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7181 vec_oprnd = new_temp;
7184 /* Arguments are ready. Create the new vector stmt. */
7185 if (final_mask)
7187 align = least_bit_hwi (misalign | align);
7188 tree ptr = build_int_cst (ref_type, align);
7189 gcall *call
7190 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7191 dataref_ptr, ptr,
7192 final_mask, vec_oprnd);
7193 gimple_call_set_nothrow (call, true);
7194 new_stmt_info
7195 = vect_finish_stmt_generation (stmt_info, call, gsi);
7197 else
7199 data_ref = fold_build2 (MEM_REF, vectype,
7200 dataref_ptr,
7201 dataref_offset
7202 ? dataref_offset
7203 : build_int_cst (ref_type, 0));
7204 if (aligned_access_p (first_dr_info))
7206 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7207 TREE_TYPE (data_ref)
7208 = build_aligned_type (TREE_TYPE (data_ref),
7209 align * BITS_PER_UNIT);
7210 else
7211 TREE_TYPE (data_ref)
7212 = build_aligned_type (TREE_TYPE (data_ref),
7213 TYPE_ALIGN (elem_type));
7214 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7215 gassign *new_stmt
7216 = gimple_build_assign (data_ref, vec_oprnd);
7217 new_stmt_info
7218 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7221 if (slp)
7222 continue;
7224 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7225 if (!next_stmt_info)
7226 break;
7229 if (!slp)
7231 if (j == 0)
7232 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7233 else
7234 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7235 prev_stmt_info = new_stmt_info;
7239 oprnds.release ();
7240 result_chain.release ();
7241 vec_oprnds.release ();
7243 return true;
7246 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7247 VECTOR_CST mask. No checks are made that the target platform supports the
7248 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7249 vect_gen_perm_mask_checked. */
7251 tree
7252 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7254 tree mask_type;
7256 poly_uint64 nunits = sel.length ();
7257 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7259 mask_type = build_vector_type (ssizetype, nunits);
7260 return vec_perm_indices_to_tree (mask_type, sel);
7263 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7264 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7266 tree
7267 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7269 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7270 return vect_gen_perm_mask_any (vectype, sel);
7273 /* Given a vector variable X and Y, that was generated for the scalar
7274 STMT_INFO, generate instructions to permute the vector elements of X and Y
7275 using permutation mask MASK_VEC, insert them at *GSI and return the
7276 permuted vector variable. */
7278 static tree
7279 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7280 gimple_stmt_iterator *gsi)
7282 tree vectype = TREE_TYPE (x);
7283 tree perm_dest, data_ref;
7284 gimple *perm_stmt;
7286 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7287 if (TREE_CODE (scalar_dest) == SSA_NAME)
7288 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7289 else
7290 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7291 data_ref = make_ssa_name (perm_dest);
7293 /* Generate the permute statement. */
7294 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7295 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7297 return data_ref;
7300 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7301 inserting them on the loops preheader edge. Returns true if we
7302 were successful in doing so (and thus STMT_INFO can be moved then),
7303 otherwise returns false. */
7305 static bool
7306 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7308 ssa_op_iter i;
7309 tree op;
7310 bool any = false;
7312 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7314 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7315 if (!gimple_nop_p (def_stmt)
7316 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7318 /* Make sure we don't need to recurse. While we could do
7319 so in simple cases when there are more complex use webs
7320 we don't have an easy way to preserve stmt order to fulfil
7321 dependencies within them. */
7322 tree op2;
7323 ssa_op_iter i2;
7324 if (gimple_code (def_stmt) == GIMPLE_PHI)
7325 return false;
7326 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7328 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7329 if (!gimple_nop_p (def_stmt2)
7330 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7331 return false;
7333 any = true;
7337 if (!any)
7338 return true;
7340 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7342 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7343 if (!gimple_nop_p (def_stmt)
7344 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7346 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7347 gsi_remove (&gsi, false);
7348 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7352 return true;
7355 /* vectorizable_load.
7357 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7358 that can be vectorized.
7359 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7360 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7361 Return true if STMT_INFO is vectorizable in this way. */
7363 static bool
7364 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7365 stmt_vec_info *vec_stmt, slp_tree slp_node,
7366 slp_instance slp_node_instance,
7367 stmt_vector_for_cost *cost_vec)
7369 tree scalar_dest;
7370 tree vec_dest = NULL;
7371 tree data_ref = NULL;
7372 stmt_vec_info prev_stmt_info;
7373 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7374 struct loop *loop = NULL;
7375 struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7376 bool nested_in_vect_loop = false;
7377 tree elem_type;
7378 tree new_temp;
7379 machine_mode mode;
7380 tree dummy;
7381 enum dr_alignment_support alignment_support_scheme;
7382 tree dataref_ptr = NULL_TREE;
7383 tree dataref_offset = NULL_TREE;
7384 gimple *ptr_incr = NULL;
7385 int ncopies;
7386 int i, j;
7387 unsigned int group_size;
7388 poly_uint64 group_gap_adj;
7389 tree msq = NULL_TREE, lsq;
7390 tree offset = NULL_TREE;
7391 tree byte_offset = NULL_TREE;
7392 tree realignment_token = NULL_TREE;
7393 gphi *phi = NULL;
7394 vec<tree> dr_chain = vNULL;
7395 bool grouped_load = false;
7396 stmt_vec_info first_stmt_info;
7397 stmt_vec_info first_stmt_info_for_drptr = NULL;
7398 bool compute_in_loop = false;
7399 struct loop *at_loop;
7400 int vec_num;
7401 bool slp = (slp_node != NULL);
7402 bool slp_perm = false;
7403 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7404 poly_uint64 vf;
7405 tree aggr_type;
7406 gather_scatter_info gs_info;
7407 vec_info *vinfo = stmt_info->vinfo;
7408 tree ref_type;
7409 enum vect_def_type mask_dt = vect_unknown_def_type;
7411 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7412 return false;
7414 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7415 && ! vec_stmt)
7416 return false;
7418 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7419 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7421 scalar_dest = gimple_assign_lhs (assign);
7422 if (TREE_CODE (scalar_dest) != SSA_NAME)
7423 return false;
7425 tree_code code = gimple_assign_rhs_code (assign);
7426 if (code != ARRAY_REF
7427 && code != BIT_FIELD_REF
7428 && code != INDIRECT_REF
7429 && code != COMPONENT_REF
7430 && code != IMAGPART_EXPR
7431 && code != REALPART_EXPR
7432 && code != MEM_REF
7433 && TREE_CODE_CLASS (code) != tcc_declaration)
7434 return false;
7436 else
7438 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7439 if (!call || !gimple_call_internal_p (call))
7440 return false;
7442 internal_fn ifn = gimple_call_internal_fn (call);
7443 if (!internal_load_fn_p (ifn))
7444 return false;
7446 scalar_dest = gimple_call_lhs (call);
7447 if (!scalar_dest)
7448 return false;
7450 if (slp_node != NULL)
7452 if (dump_enabled_p ())
7453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7454 "SLP of masked loads not supported.\n");
7455 return false;
7458 int mask_index = internal_fn_mask_index (ifn);
7459 if (mask_index >= 0)
7461 mask = gimple_call_arg (call, mask_index);
7462 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7463 &mask_vectype))
7464 return false;
7468 if (!STMT_VINFO_DATA_REF (stmt_info))
7469 return false;
7471 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7472 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7474 if (loop_vinfo)
7476 loop = LOOP_VINFO_LOOP (loop_vinfo);
7477 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7478 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7480 else
7481 vf = 1;
7483 /* Multiple types in SLP are handled by creating the appropriate number of
7484 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7485 case of SLP. */
7486 if (slp)
7487 ncopies = 1;
7488 else
7489 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7491 gcc_assert (ncopies >= 1);
7493 /* FORNOW. This restriction should be relaxed. */
7494 if (nested_in_vect_loop && ncopies > 1)
7496 if (dump_enabled_p ())
7497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7498 "multiple types in nested loop.\n");
7499 return false;
7502 /* Invalidate assumptions made by dependence analysis when vectorization
7503 on the unrolled body effectively re-orders stmts. */
7504 if (ncopies > 1
7505 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7506 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7507 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7509 if (dump_enabled_p ())
7510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7511 "cannot perform implicit CSE when unrolling "
7512 "with negative dependence distance\n");
7513 return false;
7516 elem_type = TREE_TYPE (vectype);
7517 mode = TYPE_MODE (vectype);
7519 /* FORNOW. In some cases can vectorize even if data-type not supported
7520 (e.g. - data copies). */
7521 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7523 if (dump_enabled_p ())
7524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7525 "Aligned load, but unsupported type.\n");
7526 return false;
7529 /* Check if the load is a part of an interleaving chain. */
7530 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7532 grouped_load = true;
7533 /* FORNOW */
7534 gcc_assert (!nested_in_vect_loop);
7535 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7537 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7538 group_size = DR_GROUP_SIZE (first_stmt_info);
7540 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7541 slp_perm = true;
7543 /* Invalidate assumptions made by dependence analysis when vectorization
7544 on the unrolled body effectively re-orders stmts. */
7545 if (!PURE_SLP_STMT (stmt_info)
7546 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7547 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7548 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7550 if (dump_enabled_p ())
7551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7552 "cannot perform implicit CSE when performing "
7553 "group loads with negative dependence distance\n");
7554 return false;
7557 /* Similarly when the stmt is a load that is both part of a SLP
7558 instance and a loop vectorized stmt via the same-dr mechanism
7559 we have to give up. */
7560 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7561 && (STMT_SLP_TYPE (stmt_info)
7562 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
7564 if (dump_enabled_p ())
7565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7566 "conflicting SLP types for CSEd load\n");
7567 return false;
7570 else
7571 group_size = 1;
7573 vect_memory_access_type memory_access_type;
7574 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7575 &memory_access_type, &gs_info))
7576 return false;
7578 if (mask)
7580 if (memory_access_type == VMAT_CONTIGUOUS)
7582 machine_mode vec_mode = TYPE_MODE (vectype);
7583 if (!VECTOR_MODE_P (vec_mode)
7584 || !can_vec_mask_load_store_p (vec_mode,
7585 TYPE_MODE (mask_vectype), true))
7586 return false;
7588 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7589 && memory_access_type != VMAT_GATHER_SCATTER)
7591 if (dump_enabled_p ())
7592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7593 "unsupported access type for masked load.\n");
7594 return false;
7598 if (!vec_stmt) /* transformation not required. */
7600 if (!slp)
7601 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7603 if (loop_vinfo
7604 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7605 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7606 memory_access_type, &gs_info);
7608 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7609 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7610 slp_node_instance, slp_node, cost_vec);
7611 return true;
7614 if (!slp)
7615 gcc_assert (memory_access_type
7616 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7618 if (dump_enabled_p ())
7619 dump_printf_loc (MSG_NOTE, vect_location,
7620 "transform load. ncopies = %d\n", ncopies);
7622 /* Transform. */
7624 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7625 ensure_base_align (dr_info);
7627 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7629 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7630 return true;
7633 if (memory_access_type == VMAT_INVARIANT)
7635 gcc_assert (!grouped_load && !mask && !bb_vinfo);
7636 /* If we have versioned for aliasing or the loop doesn't
7637 have any data dependencies that would preclude this,
7638 then we are sure this is a loop invariant load and
7639 thus we can insert it on the preheader edge. */
7640 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7641 && !nested_in_vect_loop
7642 && hoist_defs_of_uses (stmt_info, loop));
7643 if (hoist_p)
7645 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7646 if (dump_enabled_p ())
7647 dump_printf_loc (MSG_NOTE, vect_location,
7648 "hoisting out of the vectorized loop: %G", stmt);
7649 scalar_dest = copy_ssa_name (scalar_dest);
7650 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7651 gsi_insert_on_edge_immediate
7652 (loop_preheader_edge (loop),
7653 gimple_build_assign (scalar_dest, rhs));
7655 /* These copies are all equivalent, but currently the representation
7656 requires a separate STMT_VINFO_VEC_STMT for each one. */
7657 prev_stmt_info = NULL;
7658 gimple_stmt_iterator gsi2 = *gsi;
7659 gsi_next (&gsi2);
7660 for (j = 0; j < ncopies; j++)
7662 stmt_vec_info new_stmt_info;
7663 if (hoist_p)
7665 new_temp = vect_init_vector (stmt_info, scalar_dest,
7666 vectype, NULL);
7667 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7668 new_stmt_info = vinfo->add_stmt (new_stmt);
7670 else
7672 new_temp = vect_init_vector (stmt_info, scalar_dest,
7673 vectype, &gsi2);
7674 new_stmt_info = vinfo->lookup_def (new_temp);
7676 if (slp)
7677 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7678 else if (j == 0)
7679 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7680 else
7681 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7682 prev_stmt_info = new_stmt_info;
7684 return true;
7687 if (memory_access_type == VMAT_ELEMENTWISE
7688 || memory_access_type == VMAT_STRIDED_SLP)
7690 gimple_stmt_iterator incr_gsi;
7691 bool insert_after;
7692 gimple *incr;
7693 tree offvar;
7694 tree ivstep;
7695 tree running_off;
7696 vec<constructor_elt, va_gc> *v = NULL;
7697 tree stride_base, stride_step, alias_off;
7698 /* Checked by get_load_store_type. */
7699 unsigned int const_nunits = nunits.to_constant ();
7700 unsigned HOST_WIDE_INT cst_offset = 0;
7702 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7703 gcc_assert (!nested_in_vect_loop);
7705 if (grouped_load)
7707 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7708 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7710 else
7712 first_stmt_info = stmt_info;
7713 first_dr_info = dr_info;
7715 if (slp && grouped_load)
7717 group_size = DR_GROUP_SIZE (first_stmt_info);
7718 ref_type = get_group_alias_ptr_type (first_stmt_info);
7720 else
7722 if (grouped_load)
7723 cst_offset
7724 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7725 * vect_get_place_in_interleaving_chain (stmt_info,
7726 first_stmt_info));
7727 group_size = 1;
7728 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7731 stride_base
7732 = fold_build_pointer_plus
7733 (DR_BASE_ADDRESS (first_dr_info->dr),
7734 size_binop (PLUS_EXPR,
7735 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7736 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7737 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7739 /* For a load with loop-invariant (but other than power-of-2)
7740 stride (i.e. not a grouped access) like so:
7742 for (i = 0; i < n; i += stride)
7743 ... = array[i];
7745 we generate a new induction variable and new accesses to
7746 form a new vector (or vectors, depending on ncopies):
7748 for (j = 0; ; j += VF*stride)
7749 tmp1 = array[j];
7750 tmp2 = array[j + stride];
7752 vectemp = {tmp1, tmp2, ...}
7755 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7756 build_int_cst (TREE_TYPE (stride_step), vf));
7758 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7760 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7761 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7762 create_iv (stride_base, ivstep, NULL,
7763 loop, &incr_gsi, insert_after,
7764 &offvar, NULL);
7765 incr = gsi_stmt (incr_gsi);
7766 loop_vinfo->add_stmt (incr);
7768 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7770 prev_stmt_info = NULL;
7771 running_off = offvar;
7772 alias_off = build_int_cst (ref_type, 0);
7773 int nloads = const_nunits;
7774 int lnel = 1;
7775 tree ltype = TREE_TYPE (vectype);
7776 tree lvectype = vectype;
7777 auto_vec<tree> dr_chain;
7778 if (memory_access_type == VMAT_STRIDED_SLP)
7780 if (group_size < const_nunits)
7782 /* First check if vec_init optab supports construction from
7783 vector elts directly. */
7784 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7785 machine_mode vmode;
7786 if (mode_for_vector (elmode, group_size).exists (&vmode)
7787 && VECTOR_MODE_P (vmode)
7788 && targetm.vector_mode_supported_p (vmode)
7789 && (convert_optab_handler (vec_init_optab,
7790 TYPE_MODE (vectype), vmode)
7791 != CODE_FOR_nothing))
7793 nloads = const_nunits / group_size;
7794 lnel = group_size;
7795 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7797 else
7799 /* Otherwise avoid emitting a constructor of vector elements
7800 by performing the loads using an integer type of the same
7801 size, constructing a vector of those and then
7802 re-interpreting it as the original vector type.
7803 This avoids a huge runtime penalty due to the general
7804 inability to perform store forwarding from smaller stores
7805 to a larger load. */
7806 unsigned lsize
7807 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7808 unsigned int lnunits = const_nunits / group_size;
7809 /* If we can't construct such a vector fall back to
7810 element loads of the original vector type. */
7811 if (int_mode_for_size (lsize, 0).exists (&elmode)
7812 && mode_for_vector (elmode, lnunits).exists (&vmode)
7813 && VECTOR_MODE_P (vmode)
7814 && targetm.vector_mode_supported_p (vmode)
7815 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7816 != CODE_FOR_nothing))
7818 nloads = lnunits;
7819 lnel = group_size;
7820 ltype = build_nonstandard_integer_type (lsize, 1);
7821 lvectype = build_vector_type (ltype, nloads);
7825 else
7827 nloads = 1;
7828 lnel = const_nunits;
7829 ltype = vectype;
7831 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7833 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7834 else if (nloads == 1)
7835 ltype = vectype;
7837 if (slp)
7839 /* For SLP permutation support we need to load the whole group,
7840 not only the number of vector stmts the permutation result
7841 fits in. */
7842 if (slp_perm)
7844 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7845 variable VF. */
7846 unsigned int const_vf = vf.to_constant ();
7847 ncopies = CEIL (group_size * const_vf, const_nunits);
7848 dr_chain.create (ncopies);
7850 else
7851 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7853 unsigned int group_el = 0;
7854 unsigned HOST_WIDE_INT
7855 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7856 for (j = 0; j < ncopies; j++)
7858 if (nloads > 1)
7859 vec_alloc (v, nloads);
7860 stmt_vec_info new_stmt_info = NULL;
7861 for (i = 0; i < nloads; i++)
7863 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7864 group_el * elsz + cst_offset);
7865 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7866 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7867 gassign *new_stmt
7868 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7869 new_stmt_info
7870 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7871 if (nloads > 1)
7872 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7873 gimple_assign_lhs (new_stmt));
7875 group_el += lnel;
7876 if (! slp
7877 || group_el == group_size)
7879 tree newoff = copy_ssa_name (running_off);
7880 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7881 running_off, stride_step);
7882 vect_finish_stmt_generation (stmt_info, incr, gsi);
7884 running_off = newoff;
7885 group_el = 0;
7888 if (nloads > 1)
7890 tree vec_inv = build_constructor (lvectype, v);
7891 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
7892 new_stmt_info = vinfo->lookup_def (new_temp);
7893 if (lvectype != vectype)
7895 gassign *new_stmt
7896 = gimple_build_assign (make_ssa_name (vectype),
7897 VIEW_CONVERT_EXPR,
7898 build1 (VIEW_CONVERT_EXPR,
7899 vectype, new_temp));
7900 new_stmt_info
7901 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7905 if (slp)
7907 if (slp_perm)
7908 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
7909 else
7910 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7912 else
7914 if (j == 0)
7915 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7916 else
7917 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7918 prev_stmt_info = new_stmt_info;
7921 if (slp_perm)
7923 unsigned n_perms;
7924 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7925 slp_node_instance, false, &n_perms);
7927 return true;
7930 if (memory_access_type == VMAT_GATHER_SCATTER
7931 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7932 grouped_load = false;
7934 if (grouped_load)
7936 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7937 group_size = DR_GROUP_SIZE (first_stmt_info);
7938 /* For SLP vectorization we directly vectorize a subchain
7939 without permutation. */
7940 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7941 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7942 /* For BB vectorization always use the first stmt to base
7943 the data ref pointer on. */
7944 if (bb_vinfo)
7945 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7947 /* Check if the chain of loads is already vectorized. */
7948 if (STMT_VINFO_VEC_STMT (first_stmt_info)
7949 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7950 ??? But we can only do so if there is exactly one
7951 as we have no way to get at the rest. Leave the CSE
7952 opportunity alone.
7953 ??? With the group load eventually participating
7954 in multiple different permutations (having multiple
7955 slp nodes which refer to the same group) the CSE
7956 is even wrong code. See PR56270. */
7957 && !slp)
7959 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7960 return true;
7962 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7963 group_gap_adj = 0;
7965 /* VEC_NUM is the number of vect stmts to be created for this group. */
7966 if (slp)
7968 grouped_load = false;
7969 /* If an SLP permutation is from N elements to N elements,
7970 and if one vector holds a whole number of N, we can load
7971 the inputs to the permutation in the same way as an
7972 unpermuted sequence. In other cases we need to load the
7973 whole group, not only the number of vector stmts the
7974 permutation result fits in. */
7975 if (slp_perm
7976 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
7977 || !multiple_p (nunits, group_size)))
7979 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
7980 variable VF; see vect_transform_slp_perm_load. */
7981 unsigned int const_vf = vf.to_constant ();
7982 unsigned int const_nunits = nunits.to_constant ();
7983 vec_num = CEIL (group_size * const_vf, const_nunits);
7984 group_gap_adj = vf * group_size - nunits * vec_num;
7986 else
7988 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7989 group_gap_adj
7990 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7993 else
7994 vec_num = group_size;
7996 ref_type = get_group_alias_ptr_type (first_stmt_info);
7998 else
8000 first_stmt_info = stmt_info;
8001 first_dr_info = dr_info;
8002 group_size = vec_num = 1;
8003 group_gap_adj = 0;
8004 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8007 alignment_support_scheme
8008 = vect_supportable_dr_alignment (first_dr_info, false);
8009 gcc_assert (alignment_support_scheme);
8010 vec_loop_masks *loop_masks
8011 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8012 ? &LOOP_VINFO_MASKS (loop_vinfo)
8013 : NULL);
8014 /* Targets with store-lane instructions must not require explicit
8015 realignment. vect_supportable_dr_alignment always returns either
8016 dr_aligned or dr_unaligned_supported for masked operations. */
8017 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8018 && !mask
8019 && !loop_masks)
8020 || alignment_support_scheme == dr_aligned
8021 || alignment_support_scheme == dr_unaligned_supported);
8023 /* In case the vectorization factor (VF) is bigger than the number
8024 of elements that we can fit in a vectype (nunits), we have to generate
8025 more than one vector stmt - i.e - we need to "unroll" the
8026 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8027 from one copy of the vector stmt to the next, in the field
8028 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8029 stages to find the correct vector defs to be used when vectorizing
8030 stmts that use the defs of the current stmt. The example below
8031 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8032 need to create 4 vectorized stmts):
8034 before vectorization:
8035 RELATED_STMT VEC_STMT
8036 S1: x = memref - -
8037 S2: z = x + 1 - -
8039 step 1: vectorize stmt S1:
8040 We first create the vector stmt VS1_0, and, as usual, record a
8041 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8042 Next, we create the vector stmt VS1_1, and record a pointer to
8043 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8044 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8045 stmts and pointers:
8046 RELATED_STMT VEC_STMT
8047 VS1_0: vx0 = memref0 VS1_1 -
8048 VS1_1: vx1 = memref1 VS1_2 -
8049 VS1_2: vx2 = memref2 VS1_3 -
8050 VS1_3: vx3 = memref3 - -
8051 S1: x = load - VS1_0
8052 S2: z = x + 1 - -
8054 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8055 information we recorded in RELATED_STMT field is used to vectorize
8056 stmt S2. */
8058 /* In case of interleaving (non-unit grouped access):
8060 S1: x2 = &base + 2
8061 S2: x0 = &base
8062 S3: x1 = &base + 1
8063 S4: x3 = &base + 3
8065 Vectorized loads are created in the order of memory accesses
8066 starting from the access of the first stmt of the chain:
8068 VS1: vx0 = &base
8069 VS2: vx1 = &base + vec_size*1
8070 VS3: vx3 = &base + vec_size*2
8071 VS4: vx4 = &base + vec_size*3
8073 Then permutation statements are generated:
8075 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8076 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8079 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8080 (the order of the data-refs in the output of vect_permute_load_chain
8081 corresponds to the order of scalar stmts in the interleaving chain - see
8082 the documentation of vect_permute_load_chain()).
8083 The generation of permutation stmts and recording them in
8084 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8086 In case of both multiple types and interleaving, the vector loads and
8087 permutation stmts above are created for every copy. The result vector
8088 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8089 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8091 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8092 on a target that supports unaligned accesses (dr_unaligned_supported)
8093 we generate the following code:
8094 p = initial_addr;
8095 indx = 0;
8096 loop {
8097 p = p + indx * vectype_size;
8098 vec_dest = *(p);
8099 indx = indx + 1;
8102 Otherwise, the data reference is potentially unaligned on a target that
8103 does not support unaligned accesses (dr_explicit_realign_optimized) -
8104 then generate the following code, in which the data in each iteration is
8105 obtained by two vector loads, one from the previous iteration, and one
8106 from the current iteration:
8107 p1 = initial_addr;
8108 msq_init = *(floor(p1))
8109 p2 = initial_addr + VS - 1;
8110 realignment_token = call target_builtin;
8111 indx = 0;
8112 loop {
8113 p2 = p2 + indx * vectype_size
8114 lsq = *(floor(p2))
8115 vec_dest = realign_load (msq, lsq, realignment_token)
8116 indx = indx + 1;
8117 msq = lsq;
8118 } */
8120 /* If the misalignment remains the same throughout the execution of the
8121 loop, we can create the init_addr and permutation mask at the loop
8122 preheader. Otherwise, it needs to be created inside the loop.
8123 This can only occur when vectorizing memory accesses in the inner-loop
8124 nested within an outer-loop that is being vectorized. */
8126 if (nested_in_vect_loop
8127 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8128 GET_MODE_SIZE (TYPE_MODE (vectype))))
8130 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8131 compute_in_loop = true;
8134 if ((alignment_support_scheme == dr_explicit_realign_optimized
8135 || alignment_support_scheme == dr_explicit_realign)
8136 && !compute_in_loop)
8138 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8139 alignment_support_scheme, NULL_TREE,
8140 &at_loop);
8141 if (alignment_support_scheme == dr_explicit_realign_optimized)
8143 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8144 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8145 size_one_node);
8148 else
8149 at_loop = loop;
8151 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8152 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8154 tree bump;
8155 tree vec_offset = NULL_TREE;
8156 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8158 aggr_type = NULL_TREE;
8159 bump = NULL_TREE;
8161 else if (memory_access_type == VMAT_GATHER_SCATTER)
8163 aggr_type = elem_type;
8164 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8165 &bump, &vec_offset);
8167 else
8169 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8170 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8171 else
8172 aggr_type = vectype;
8173 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8174 memory_access_type);
8177 tree vec_mask = NULL_TREE;
8178 prev_stmt_info = NULL;
8179 poly_uint64 group_elt = 0;
8180 for (j = 0; j < ncopies; j++)
8182 stmt_vec_info new_stmt_info = NULL;
8183 /* 1. Create the vector or array pointer update chain. */
8184 if (j == 0)
8186 bool simd_lane_access_p
8187 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8188 if (simd_lane_access_p
8189 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8190 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8191 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8192 && integer_zerop (DR_INIT (first_dr_info->dr))
8193 && alias_sets_conflict_p (get_alias_set (aggr_type),
8194 get_alias_set (TREE_TYPE (ref_type)))
8195 && (alignment_support_scheme == dr_aligned
8196 || alignment_support_scheme == dr_unaligned_supported))
8198 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8199 dataref_offset = build_int_cst (ref_type, 0);
8201 else if (first_stmt_info_for_drptr
8202 && first_stmt_info != first_stmt_info_for_drptr)
8204 dataref_ptr
8205 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8206 aggr_type, at_loop, offset, &dummy,
8207 gsi, &ptr_incr, simd_lane_access_p,
8208 byte_offset, bump);
8209 /* Adjust the pointer by the difference to first_stmt. */
8210 data_reference_p ptrdr
8211 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8212 tree diff
8213 = fold_convert (sizetype,
8214 size_binop (MINUS_EXPR,
8215 DR_INIT (first_dr_info->dr),
8216 DR_INIT (ptrdr)));
8217 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8218 stmt_info, diff);
8220 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8221 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8222 &dataref_ptr, &vec_offset);
8223 else
8224 dataref_ptr
8225 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8226 offset, &dummy, gsi, &ptr_incr,
8227 simd_lane_access_p,
8228 byte_offset, bump);
8229 if (mask)
8230 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8231 mask_vectype);
8233 else
8235 if (dataref_offset)
8236 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8237 bump);
8238 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8239 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8240 else
8241 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8242 stmt_info, bump);
8243 if (mask)
8244 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8247 if (grouped_load || slp_perm)
8248 dr_chain.create (vec_num);
8250 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8252 tree vec_array;
8254 vec_array = create_vector_array (vectype, vec_num);
8256 tree final_mask = NULL_TREE;
8257 if (loop_masks)
8258 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8259 vectype, j);
8260 if (vec_mask)
8261 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8262 vec_mask, gsi);
8264 gcall *call;
8265 if (final_mask)
8267 /* Emit:
8268 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8269 VEC_MASK). */
8270 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8271 tree alias_ptr = build_int_cst (ref_type, align);
8272 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8273 dataref_ptr, alias_ptr,
8274 final_mask);
8276 else
8278 /* Emit:
8279 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8280 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8281 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8283 gimple_call_set_lhs (call, vec_array);
8284 gimple_call_set_nothrow (call, true);
8285 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8287 /* Extract each vector into an SSA_NAME. */
8288 for (i = 0; i < vec_num; i++)
8290 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8291 vec_array, i);
8292 dr_chain.quick_push (new_temp);
8295 /* Record the mapping between SSA_NAMEs and statements. */
8296 vect_record_grouped_load_vectors (stmt_info, dr_chain);
8298 /* Record that VEC_ARRAY is now dead. */
8299 vect_clobber_variable (stmt_info, gsi, vec_array);
8301 else
8303 for (i = 0; i < vec_num; i++)
8305 tree final_mask = NULL_TREE;
8306 if (loop_masks
8307 && memory_access_type != VMAT_INVARIANT)
8308 final_mask = vect_get_loop_mask (gsi, loop_masks,
8309 vec_num * ncopies,
8310 vectype, vec_num * j + i);
8311 if (vec_mask)
8312 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8313 vec_mask, gsi);
8315 if (i > 0)
8316 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8317 stmt_info, bump);
8319 /* 2. Create the vector-load in the loop. */
8320 gimple *new_stmt = NULL;
8321 switch (alignment_support_scheme)
8323 case dr_aligned:
8324 case dr_unaligned_supported:
8326 unsigned int misalign;
8327 unsigned HOST_WIDE_INT align;
8329 if (memory_access_type == VMAT_GATHER_SCATTER)
8331 tree scale = size_int (gs_info.scale);
8332 gcall *call;
8333 if (loop_masks)
8334 call = gimple_build_call_internal
8335 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8336 vec_offset, scale, final_mask);
8337 else
8338 call = gimple_build_call_internal
8339 (IFN_GATHER_LOAD, 3, dataref_ptr,
8340 vec_offset, scale);
8341 gimple_call_set_nothrow (call, true);
8342 new_stmt = call;
8343 data_ref = NULL_TREE;
8344 break;
8347 align =
8348 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8349 if (alignment_support_scheme == dr_aligned)
8351 gcc_assert (aligned_access_p (first_dr_info));
8352 misalign = 0;
8354 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8356 align = dr_alignment
8357 (vect_dr_behavior (first_dr_info));
8358 misalign = 0;
8360 else
8361 misalign = DR_MISALIGNMENT (first_dr_info);
8362 if (dataref_offset == NULL_TREE
8363 && TREE_CODE (dataref_ptr) == SSA_NAME)
8364 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8365 align, misalign);
8367 if (final_mask)
8369 align = least_bit_hwi (misalign | align);
8370 tree ptr = build_int_cst (ref_type, align);
8371 gcall *call
8372 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8373 dataref_ptr, ptr,
8374 final_mask);
8375 gimple_call_set_nothrow (call, true);
8376 new_stmt = call;
8377 data_ref = NULL_TREE;
8379 else
8381 data_ref
8382 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8383 dataref_offset
8384 ? dataref_offset
8385 : build_int_cst (ref_type, 0));
8386 if (alignment_support_scheme == dr_aligned)
8388 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8389 TREE_TYPE (data_ref)
8390 = build_aligned_type (TREE_TYPE (data_ref),
8391 align * BITS_PER_UNIT);
8392 else
8393 TREE_TYPE (data_ref)
8394 = build_aligned_type (TREE_TYPE (data_ref),
8395 TYPE_ALIGN (elem_type));
8397 break;
8399 case dr_explicit_realign:
8401 tree ptr, bump;
8403 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8405 if (compute_in_loop)
8406 msq = vect_setup_realignment (first_stmt_info, gsi,
8407 &realignment_token,
8408 dr_explicit_realign,
8409 dataref_ptr, NULL);
8411 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8412 ptr = copy_ssa_name (dataref_ptr);
8413 else
8414 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8415 // For explicit realign the target alignment should be
8416 // known at compile time.
8417 unsigned HOST_WIDE_INT align =
8418 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8419 new_stmt = gimple_build_assign
8420 (ptr, BIT_AND_EXPR, dataref_ptr,
8421 build_int_cst
8422 (TREE_TYPE (dataref_ptr),
8423 -(HOST_WIDE_INT) align));
8424 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8425 data_ref
8426 = build2 (MEM_REF, vectype, ptr,
8427 build_int_cst (ref_type, 0));
8428 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8429 vec_dest = vect_create_destination_var (scalar_dest,
8430 vectype);
8431 new_stmt = gimple_build_assign (vec_dest, data_ref);
8432 new_temp = make_ssa_name (vec_dest, new_stmt);
8433 gimple_assign_set_lhs (new_stmt, new_temp);
8434 gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8435 gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8436 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8437 msq = new_temp;
8439 bump = size_binop (MULT_EXPR, vs,
8440 TYPE_SIZE_UNIT (elem_type));
8441 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8442 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8443 stmt_info, bump);
8444 new_stmt = gimple_build_assign
8445 (NULL_TREE, BIT_AND_EXPR, ptr,
8446 build_int_cst
8447 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8448 ptr = copy_ssa_name (ptr, new_stmt);
8449 gimple_assign_set_lhs (new_stmt, ptr);
8450 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8451 data_ref
8452 = build2 (MEM_REF, vectype, ptr,
8453 build_int_cst (ref_type, 0));
8454 break;
8456 case dr_explicit_realign_optimized:
8458 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8459 new_temp = copy_ssa_name (dataref_ptr);
8460 else
8461 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8462 // We should only be doing this if we know the target
8463 // alignment at compile time.
8464 unsigned HOST_WIDE_INT align =
8465 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8466 new_stmt = gimple_build_assign
8467 (new_temp, BIT_AND_EXPR, dataref_ptr,
8468 build_int_cst (TREE_TYPE (dataref_ptr),
8469 -(HOST_WIDE_INT) align));
8470 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8471 data_ref
8472 = build2 (MEM_REF, vectype, new_temp,
8473 build_int_cst (ref_type, 0));
8474 break;
8476 default:
8477 gcc_unreachable ();
8479 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8480 /* DATA_REF is null if we've already built the statement. */
8481 if (data_ref)
8483 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8484 new_stmt = gimple_build_assign (vec_dest, data_ref);
8486 new_temp = make_ssa_name (vec_dest, new_stmt);
8487 gimple_set_lhs (new_stmt, new_temp);
8488 new_stmt_info
8489 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8491 /* 3. Handle explicit realignment if necessary/supported.
8492 Create in loop:
8493 vec_dest = realign_load (msq, lsq, realignment_token) */
8494 if (alignment_support_scheme == dr_explicit_realign_optimized
8495 || alignment_support_scheme == dr_explicit_realign)
8497 lsq = gimple_assign_lhs (new_stmt);
8498 if (!realignment_token)
8499 realignment_token = dataref_ptr;
8500 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8501 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8502 msq, lsq, realignment_token);
8503 new_temp = make_ssa_name (vec_dest, new_stmt);
8504 gimple_assign_set_lhs (new_stmt, new_temp);
8505 new_stmt_info
8506 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8508 if (alignment_support_scheme == dr_explicit_realign_optimized)
8510 gcc_assert (phi);
8511 if (i == vec_num - 1 && j == ncopies - 1)
8512 add_phi_arg (phi, lsq,
8513 loop_latch_edge (containing_loop),
8514 UNKNOWN_LOCATION);
8515 msq = lsq;
8519 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8521 tree perm_mask = perm_mask_for_reverse (vectype);
8522 new_temp = permute_vec_elements (new_temp, new_temp,
8523 perm_mask, stmt_info, gsi);
8524 new_stmt_info = vinfo->lookup_def (new_temp);
8527 /* Collect vector loads and later create their permutation in
8528 vect_transform_grouped_load (). */
8529 if (grouped_load || slp_perm)
8530 dr_chain.quick_push (new_temp);
8532 /* Store vector loads in the corresponding SLP_NODE. */
8533 if (slp && !slp_perm)
8534 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8536 /* With SLP permutation we load the gaps as well, without
8537 we need to skip the gaps after we manage to fully load
8538 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8539 group_elt += nunits;
8540 if (maybe_ne (group_gap_adj, 0U)
8541 && !slp_perm
8542 && known_eq (group_elt, group_size - group_gap_adj))
8544 poly_wide_int bump_val
8545 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8546 * group_gap_adj);
8547 tree bump = wide_int_to_tree (sizetype, bump_val);
8548 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8549 stmt_info, bump);
8550 group_elt = 0;
8553 /* Bump the vector pointer to account for a gap or for excess
8554 elements loaded for a permuted SLP load. */
8555 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8557 poly_wide_int bump_val
8558 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8559 * group_gap_adj);
8560 tree bump = wide_int_to_tree (sizetype, bump_val);
8561 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8562 stmt_info, bump);
8566 if (slp && !slp_perm)
8567 continue;
8569 if (slp_perm)
8571 unsigned n_perms;
8572 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8573 slp_node_instance, false,
8574 &n_perms))
8576 dr_chain.release ();
8577 return false;
8580 else
8582 if (grouped_load)
8584 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8585 vect_transform_grouped_load (stmt_info, dr_chain,
8586 group_size, gsi);
8587 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8589 else
8591 if (j == 0)
8592 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8593 else
8594 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8595 prev_stmt_info = new_stmt_info;
8598 dr_chain.release ();
8601 return true;
8604 /* Function vect_is_simple_cond.
8606 Input:
8607 LOOP - the loop that is being vectorized.
8608 COND - Condition that is checked for simple use.
8610 Output:
8611 *COMP_VECTYPE - the vector type for the comparison.
8612 *DTS - The def types for the arguments of the comparison
8614 Returns whether a COND can be vectorized. Checks whether
8615 condition operands are supportable using vec_is_simple_use. */
8617 static bool
8618 vect_is_simple_cond (tree cond, vec_info *vinfo,
8619 tree *comp_vectype, enum vect_def_type *dts,
8620 tree vectype)
8622 tree lhs, rhs;
8623 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8625 /* Mask case. */
8626 if (TREE_CODE (cond) == SSA_NAME
8627 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8629 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8630 || !*comp_vectype
8631 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8632 return false;
8633 return true;
8636 if (!COMPARISON_CLASS_P (cond))
8637 return false;
8639 lhs = TREE_OPERAND (cond, 0);
8640 rhs = TREE_OPERAND (cond, 1);
8642 if (TREE_CODE (lhs) == SSA_NAME)
8644 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8645 return false;
8647 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8648 || TREE_CODE (lhs) == FIXED_CST)
8649 dts[0] = vect_constant_def;
8650 else
8651 return false;
8653 if (TREE_CODE (rhs) == SSA_NAME)
8655 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8656 return false;
8658 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8659 || TREE_CODE (rhs) == FIXED_CST)
8660 dts[1] = vect_constant_def;
8661 else
8662 return false;
8664 if (vectype1 && vectype2
8665 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8666 TYPE_VECTOR_SUBPARTS (vectype2)))
8667 return false;
8669 *comp_vectype = vectype1 ? vectype1 : vectype2;
8670 /* Invariant comparison. */
8671 if (! *comp_vectype && vectype)
8673 tree scalar_type = TREE_TYPE (lhs);
8674 /* If we can widen the comparison to match vectype do so. */
8675 if (INTEGRAL_TYPE_P (scalar_type)
8676 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8677 TYPE_SIZE (TREE_TYPE (vectype))))
8678 scalar_type = build_nonstandard_integer_type
8679 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8680 TYPE_UNSIGNED (scalar_type));
8681 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8684 return true;
8687 /* vectorizable_condition.
8689 Check if STMT_INFO is conditional modify expression that can be vectorized.
8690 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8691 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8692 at GSI.
8694 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
8696 Return true if STMT_INFO is vectorizable in this way. */
8698 bool
8699 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8700 stmt_vec_info *vec_stmt, bool for_reduction,
8701 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8703 vec_info *vinfo = stmt_info->vinfo;
8704 tree scalar_dest = NULL_TREE;
8705 tree vec_dest = NULL_TREE;
8706 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8707 tree then_clause, else_clause;
8708 tree comp_vectype = NULL_TREE;
8709 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8710 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8711 tree vec_compare;
8712 tree new_temp;
8713 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8714 enum vect_def_type dts[4]
8715 = {vect_unknown_def_type, vect_unknown_def_type,
8716 vect_unknown_def_type, vect_unknown_def_type};
8717 int ndts = 4;
8718 int ncopies;
8719 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8720 stmt_vec_info prev_stmt_info = NULL;
8721 int i, j;
8722 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8723 vec<tree> vec_oprnds0 = vNULL;
8724 vec<tree> vec_oprnds1 = vNULL;
8725 vec<tree> vec_oprnds2 = vNULL;
8726 vec<tree> vec_oprnds3 = vNULL;
8727 tree vec_cmp_type;
8728 bool masked = false;
8730 if (for_reduction && STMT_SLP_TYPE (stmt_info))
8731 return false;
8733 vect_reduction_type reduction_type
8734 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8735 if (reduction_type == TREE_CODE_REDUCTION)
8737 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8738 return false;
8740 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8741 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8742 && for_reduction))
8743 return false;
8745 /* FORNOW: not yet supported. */
8746 if (STMT_VINFO_LIVE_P (stmt_info))
8748 if (dump_enabled_p ())
8749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8750 "value used after loop.\n");
8751 return false;
8755 /* Is vectorizable conditional operation? */
8756 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8757 if (!stmt)
8758 return false;
8760 code = gimple_assign_rhs_code (stmt);
8762 if (code != COND_EXPR)
8763 return false;
8765 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8766 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8768 if (slp_node)
8769 ncopies = 1;
8770 else
8771 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8773 gcc_assert (ncopies >= 1);
8774 if (for_reduction && ncopies > 1)
8775 return false; /* FORNOW */
8777 cond_expr = gimple_assign_rhs1 (stmt);
8778 then_clause = gimple_assign_rhs2 (stmt);
8779 else_clause = gimple_assign_rhs3 (stmt);
8781 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8782 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8783 || !comp_vectype)
8784 return false;
8786 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8787 return false;
8788 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8789 return false;
8791 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8792 return false;
8794 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8795 return false;
8797 masked = !COMPARISON_CLASS_P (cond_expr);
8798 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8800 if (vec_cmp_type == NULL_TREE)
8801 return false;
8803 cond_code = TREE_CODE (cond_expr);
8804 if (!masked)
8806 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8807 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8810 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8812 /* Boolean values may have another representation in vectors
8813 and therefore we prefer bit operations over comparison for
8814 them (which also works for scalar masks). We store opcodes
8815 to use in bitop1 and bitop2. Statement is vectorized as
8816 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8817 depending on bitop1 and bitop2 arity. */
8818 switch (cond_code)
8820 case GT_EXPR:
8821 bitop1 = BIT_NOT_EXPR;
8822 bitop2 = BIT_AND_EXPR;
8823 break;
8824 case GE_EXPR:
8825 bitop1 = BIT_NOT_EXPR;
8826 bitop2 = BIT_IOR_EXPR;
8827 break;
8828 case LT_EXPR:
8829 bitop1 = BIT_NOT_EXPR;
8830 bitop2 = BIT_AND_EXPR;
8831 std::swap (cond_expr0, cond_expr1);
8832 break;
8833 case LE_EXPR:
8834 bitop1 = BIT_NOT_EXPR;
8835 bitop2 = BIT_IOR_EXPR;
8836 std::swap (cond_expr0, cond_expr1);
8837 break;
8838 case NE_EXPR:
8839 bitop1 = BIT_XOR_EXPR;
8840 break;
8841 case EQ_EXPR:
8842 bitop1 = BIT_XOR_EXPR;
8843 bitop2 = BIT_NOT_EXPR;
8844 break;
8845 default:
8846 return false;
8848 cond_code = SSA_NAME;
8851 if (!vec_stmt)
8853 if (bitop1 != NOP_EXPR)
8855 machine_mode mode = TYPE_MODE (comp_vectype);
8856 optab optab;
8858 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8859 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8860 return false;
8862 if (bitop2 != NOP_EXPR)
8864 optab = optab_for_tree_code (bitop2, comp_vectype,
8865 optab_default);
8866 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8867 return false;
8870 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8871 cond_code))
8873 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8874 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8875 cost_vec);
8876 return true;
8878 return false;
8881 /* Transform. */
8883 if (!slp_node)
8885 vec_oprnds0.create (1);
8886 vec_oprnds1.create (1);
8887 vec_oprnds2.create (1);
8888 vec_oprnds3.create (1);
8891 /* Handle def. */
8892 scalar_dest = gimple_assign_lhs (stmt);
8893 if (reduction_type != EXTRACT_LAST_REDUCTION)
8894 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8896 /* Handle cond expr. */
8897 for (j = 0; j < ncopies; j++)
8899 stmt_vec_info new_stmt_info = NULL;
8900 if (j == 0)
8902 if (slp_node)
8904 auto_vec<tree, 4> ops;
8905 auto_vec<vec<tree>, 4> vec_defs;
8907 if (masked)
8908 ops.safe_push (cond_expr);
8909 else
8911 ops.safe_push (cond_expr0);
8912 ops.safe_push (cond_expr1);
8914 ops.safe_push (then_clause);
8915 ops.safe_push (else_clause);
8916 vect_get_slp_defs (ops, slp_node, &vec_defs);
8917 vec_oprnds3 = vec_defs.pop ();
8918 vec_oprnds2 = vec_defs.pop ();
8919 if (!masked)
8920 vec_oprnds1 = vec_defs.pop ();
8921 vec_oprnds0 = vec_defs.pop ();
8923 else
8925 if (masked)
8927 vec_cond_lhs
8928 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
8929 comp_vectype);
8931 else
8933 vec_cond_lhs
8934 = vect_get_vec_def_for_operand (cond_expr0,
8935 stmt_info, comp_vectype);
8936 vec_cond_rhs
8937 = vect_get_vec_def_for_operand (cond_expr1,
8938 stmt_info, comp_vectype);
8940 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8941 stmt_info);
8942 if (reduction_type != EXTRACT_LAST_REDUCTION)
8943 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8944 stmt_info);
8947 else
8949 vec_cond_lhs
8950 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
8951 if (!masked)
8952 vec_cond_rhs
8953 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
8955 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
8956 vec_oprnds2.pop ());
8957 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
8958 vec_oprnds3.pop ());
8961 if (!slp_node)
8963 vec_oprnds0.quick_push (vec_cond_lhs);
8964 if (!masked)
8965 vec_oprnds1.quick_push (vec_cond_rhs);
8966 vec_oprnds2.quick_push (vec_then_clause);
8967 vec_oprnds3.quick_push (vec_else_clause);
8970 /* Arguments are ready. Create the new vector stmt. */
8971 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8973 vec_then_clause = vec_oprnds2[i];
8974 vec_else_clause = vec_oprnds3[i];
8976 if (masked)
8977 vec_compare = vec_cond_lhs;
8978 else
8980 vec_cond_rhs = vec_oprnds1[i];
8981 if (bitop1 == NOP_EXPR)
8982 vec_compare = build2 (cond_code, vec_cmp_type,
8983 vec_cond_lhs, vec_cond_rhs);
8984 else
8986 new_temp = make_ssa_name (vec_cmp_type);
8987 gassign *new_stmt;
8988 if (bitop1 == BIT_NOT_EXPR)
8989 new_stmt = gimple_build_assign (new_temp, bitop1,
8990 vec_cond_rhs);
8991 else
8992 new_stmt
8993 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8994 vec_cond_rhs);
8995 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8996 if (bitop2 == NOP_EXPR)
8997 vec_compare = new_temp;
8998 else if (bitop2 == BIT_NOT_EXPR)
9000 /* Instead of doing ~x ? y : z do x ? z : y. */
9001 vec_compare = new_temp;
9002 std::swap (vec_then_clause, vec_else_clause);
9004 else
9006 vec_compare = make_ssa_name (vec_cmp_type);
9007 new_stmt
9008 = gimple_build_assign (vec_compare, bitop2,
9009 vec_cond_lhs, new_temp);
9010 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9014 if (reduction_type == EXTRACT_LAST_REDUCTION)
9016 if (!is_gimple_val (vec_compare))
9018 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9019 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9020 vec_compare);
9021 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9022 vec_compare = vec_compare_name;
9024 gcall *new_stmt = gimple_build_call_internal
9025 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9026 vec_then_clause);
9027 gimple_call_set_lhs (new_stmt, scalar_dest);
9028 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9029 if (stmt_info->stmt == gsi_stmt (*gsi))
9030 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9031 else
9033 /* In this case we're moving the definition to later in the
9034 block. That doesn't matter because the only uses of the
9035 lhs are in phi statements. */
9036 gimple_stmt_iterator old_gsi
9037 = gsi_for_stmt (stmt_info->stmt);
9038 gsi_remove (&old_gsi, true);
9039 new_stmt_info
9040 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9043 else
9045 new_temp = make_ssa_name (vec_dest);
9046 gassign *new_stmt
9047 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9048 vec_then_clause, vec_else_clause);
9049 new_stmt_info
9050 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9052 if (slp_node)
9053 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9056 if (slp_node)
9057 continue;
9059 if (j == 0)
9060 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9061 else
9062 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9064 prev_stmt_info = new_stmt_info;
9067 vec_oprnds0.release ();
9068 vec_oprnds1.release ();
9069 vec_oprnds2.release ();
9070 vec_oprnds3.release ();
9072 return true;
9075 /* vectorizable_comparison.
9077 Check if STMT_INFO is comparison expression that can be vectorized.
9078 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9079 comparison, put it in VEC_STMT, and insert it at GSI.
9081 Return true if STMT_INFO is vectorizable in this way. */
9083 static bool
9084 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9085 stmt_vec_info *vec_stmt,
9086 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9088 vec_info *vinfo = stmt_info->vinfo;
9089 tree lhs, rhs1, rhs2;
9090 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9092 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9093 tree new_temp;
9094 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9095 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9096 int ndts = 2;
9097 poly_uint64 nunits;
9098 int ncopies;
9099 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9100 stmt_vec_info prev_stmt_info = NULL;
9101 int i, j;
9102 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9103 vec<tree> vec_oprnds0 = vNULL;
9104 vec<tree> vec_oprnds1 = vNULL;
9105 tree mask_type;
9106 tree mask;
9108 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9109 return false;
9111 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9112 return false;
9114 mask_type = vectype;
9115 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9117 if (slp_node)
9118 ncopies = 1;
9119 else
9120 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9122 gcc_assert (ncopies >= 1);
9123 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9124 return false;
9126 if (STMT_VINFO_LIVE_P (stmt_info))
9128 if (dump_enabled_p ())
9129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9130 "value used after loop.\n");
9131 return false;
9134 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9135 if (!stmt)
9136 return false;
9138 code = gimple_assign_rhs_code (stmt);
9140 if (TREE_CODE_CLASS (code) != tcc_comparison)
9141 return false;
9143 rhs1 = gimple_assign_rhs1 (stmt);
9144 rhs2 = gimple_assign_rhs2 (stmt);
9146 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9147 return false;
9149 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9150 return false;
9152 if (vectype1 && vectype2
9153 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9154 TYPE_VECTOR_SUBPARTS (vectype2)))
9155 return false;
9157 vectype = vectype1 ? vectype1 : vectype2;
9159 /* Invariant comparison. */
9160 if (!vectype)
9162 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9163 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9164 return false;
9166 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9167 return false;
9169 /* Can't compare mask and non-mask types. */
9170 if (vectype1 && vectype2
9171 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9172 return false;
9174 /* Boolean values may have another representation in vectors
9175 and therefore we prefer bit operations over comparison for
9176 them (which also works for scalar masks). We store opcodes
9177 to use in bitop1 and bitop2. Statement is vectorized as
9178 BITOP2 (rhs1 BITOP1 rhs2) or
9179 rhs1 BITOP2 (BITOP1 rhs2)
9180 depending on bitop1 and bitop2 arity. */
9181 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9183 if (code == GT_EXPR)
9185 bitop1 = BIT_NOT_EXPR;
9186 bitop2 = BIT_AND_EXPR;
9188 else if (code == GE_EXPR)
9190 bitop1 = BIT_NOT_EXPR;
9191 bitop2 = BIT_IOR_EXPR;
9193 else if (code == LT_EXPR)
9195 bitop1 = BIT_NOT_EXPR;
9196 bitop2 = BIT_AND_EXPR;
9197 std::swap (rhs1, rhs2);
9198 std::swap (dts[0], dts[1]);
9200 else if (code == LE_EXPR)
9202 bitop1 = BIT_NOT_EXPR;
9203 bitop2 = BIT_IOR_EXPR;
9204 std::swap (rhs1, rhs2);
9205 std::swap (dts[0], dts[1]);
9207 else
9209 bitop1 = BIT_XOR_EXPR;
9210 if (code == EQ_EXPR)
9211 bitop2 = BIT_NOT_EXPR;
9215 if (!vec_stmt)
9217 if (bitop1 == NOP_EXPR)
9219 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9220 return false;
9222 else
9224 machine_mode mode = TYPE_MODE (vectype);
9225 optab optab;
9227 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9228 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9229 return false;
9231 if (bitop2 != NOP_EXPR)
9233 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9234 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9235 return false;
9239 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9240 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9241 dts, ndts, slp_node, cost_vec);
9242 return true;
9245 /* Transform. */
9246 if (!slp_node)
9248 vec_oprnds0.create (1);
9249 vec_oprnds1.create (1);
9252 /* Handle def. */
9253 lhs = gimple_assign_lhs (stmt);
9254 mask = vect_create_destination_var (lhs, mask_type);
9256 /* Handle cmp expr. */
9257 for (j = 0; j < ncopies; j++)
9259 stmt_vec_info new_stmt_info = NULL;
9260 if (j == 0)
9262 if (slp_node)
9264 auto_vec<tree, 2> ops;
9265 auto_vec<vec<tree>, 2> vec_defs;
9267 ops.safe_push (rhs1);
9268 ops.safe_push (rhs2);
9269 vect_get_slp_defs (ops, slp_node, &vec_defs);
9270 vec_oprnds1 = vec_defs.pop ();
9271 vec_oprnds0 = vec_defs.pop ();
9273 else
9275 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9276 vectype);
9277 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9278 vectype);
9281 else
9283 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9284 vec_oprnds0.pop ());
9285 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9286 vec_oprnds1.pop ());
9289 if (!slp_node)
9291 vec_oprnds0.quick_push (vec_rhs1);
9292 vec_oprnds1.quick_push (vec_rhs2);
9295 /* Arguments are ready. Create the new vector stmt. */
9296 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9298 vec_rhs2 = vec_oprnds1[i];
9300 new_temp = make_ssa_name (mask);
9301 if (bitop1 == NOP_EXPR)
9303 gassign *new_stmt = gimple_build_assign (new_temp, code,
9304 vec_rhs1, vec_rhs2);
9305 new_stmt_info
9306 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9308 else
9310 gassign *new_stmt;
9311 if (bitop1 == BIT_NOT_EXPR)
9312 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9313 else
9314 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9315 vec_rhs2);
9316 new_stmt_info
9317 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9318 if (bitop2 != NOP_EXPR)
9320 tree res = make_ssa_name (mask);
9321 if (bitop2 == BIT_NOT_EXPR)
9322 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9323 else
9324 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9325 new_temp);
9326 new_stmt_info
9327 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9330 if (slp_node)
9331 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9334 if (slp_node)
9335 continue;
9337 if (j == 0)
9338 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9339 else
9340 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9342 prev_stmt_info = new_stmt_info;
9345 vec_oprnds0.release ();
9346 vec_oprnds1.release ();
9348 return true;
9351 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9352 can handle all live statements in the node. Otherwise return true
9353 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9354 GSI and VEC_STMT are as for vectorizable_live_operation. */
9356 static bool
9357 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9358 slp_tree slp_node, stmt_vec_info *vec_stmt,
9359 stmt_vector_for_cost *cost_vec)
9361 if (slp_node)
9363 stmt_vec_info slp_stmt_info;
9364 unsigned int i;
9365 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9367 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9368 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9369 vec_stmt, cost_vec))
9370 return false;
9373 else if (STMT_VINFO_LIVE_P (stmt_info)
9374 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9375 vec_stmt, cost_vec))
9376 return false;
9378 return true;
9381 /* Make sure the statement is vectorizable. */
9383 opt_result
9384 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9385 slp_tree node, slp_instance node_instance,
9386 stmt_vector_for_cost *cost_vec)
9388 vec_info *vinfo = stmt_info->vinfo;
9389 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9390 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9391 bool ok;
9392 gimple_seq pattern_def_seq;
9394 if (dump_enabled_p ())
9395 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9396 stmt_info->stmt);
9398 if (gimple_has_volatile_ops (stmt_info->stmt))
9399 return opt_result::failure_at (stmt_info->stmt,
9400 "not vectorized:"
9401 " stmt has volatile operands: %G\n",
9402 stmt_info->stmt);
9404 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9405 && node == NULL
9406 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9408 gimple_stmt_iterator si;
9410 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9412 stmt_vec_info pattern_def_stmt_info
9413 = vinfo->lookup_stmt (gsi_stmt (si));
9414 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9415 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9417 /* Analyze def stmt of STMT if it's a pattern stmt. */
9418 if (dump_enabled_p ())
9419 dump_printf_loc (MSG_NOTE, vect_location,
9420 "==> examining pattern def statement: %G",
9421 pattern_def_stmt_info->stmt);
9423 opt_result res
9424 = vect_analyze_stmt (pattern_def_stmt_info,
9425 need_to_vectorize, node, node_instance,
9426 cost_vec);
9427 if (!res)
9428 return res;
9433 /* Skip stmts that do not need to be vectorized. In loops this is expected
9434 to include:
9435 - the COND_EXPR which is the loop exit condition
9436 - any LABEL_EXPRs in the loop
9437 - computations that are used only for array indexing or loop control.
9438 In basic blocks we only analyze statements that are a part of some SLP
9439 instance, therefore, all the statements are relevant.
9441 Pattern statement needs to be analyzed instead of the original statement
9442 if the original statement is not relevant. Otherwise, we analyze both
9443 statements. In basic blocks we are called from some SLP instance
9444 traversal, don't analyze pattern stmts instead, the pattern stmts
9445 already will be part of SLP instance. */
9447 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9448 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9449 && !STMT_VINFO_LIVE_P (stmt_info))
9451 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9452 && pattern_stmt_info
9453 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9454 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9456 /* Analyze PATTERN_STMT instead of the original stmt. */
9457 stmt_info = pattern_stmt_info;
9458 if (dump_enabled_p ())
9459 dump_printf_loc (MSG_NOTE, vect_location,
9460 "==> examining pattern statement: %G",
9461 stmt_info->stmt);
9463 else
9465 if (dump_enabled_p ())
9466 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9468 return opt_result::success ();
9471 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9472 && node == NULL
9473 && pattern_stmt_info
9474 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9475 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9477 /* Analyze PATTERN_STMT too. */
9478 if (dump_enabled_p ())
9479 dump_printf_loc (MSG_NOTE, vect_location,
9480 "==> examining pattern statement: %G",
9481 pattern_stmt_info->stmt);
9483 opt_result res
9484 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9485 node_instance, cost_vec);
9486 if (!res)
9487 return res;
9490 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9492 case vect_internal_def:
9493 break;
9495 case vect_reduction_def:
9496 case vect_nested_cycle:
9497 gcc_assert (!bb_vinfo
9498 && (relevance == vect_used_in_outer
9499 || relevance == vect_used_in_outer_by_reduction
9500 || relevance == vect_used_by_reduction
9501 || relevance == vect_unused_in_scope
9502 || relevance == vect_used_only_live));
9503 break;
9505 case vect_induction_def:
9506 gcc_assert (!bb_vinfo);
9507 break;
9509 case vect_constant_def:
9510 case vect_external_def:
9511 case vect_unknown_def_type:
9512 default:
9513 gcc_unreachable ();
9516 if (STMT_VINFO_RELEVANT_P (stmt_info))
9518 tree type = gimple_expr_type (stmt_info->stmt);
9519 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9520 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9521 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9522 || (call && gimple_call_lhs (call) == NULL_TREE));
9523 *need_to_vectorize = true;
9526 if (PURE_SLP_STMT (stmt_info) && !node)
9528 if (dump_enabled_p ())
9529 dump_printf_loc (MSG_NOTE, vect_location,
9530 "handled only by SLP analysis\n");
9531 return opt_result::success ();
9534 ok = true;
9535 if (!bb_vinfo
9536 && (STMT_VINFO_RELEVANT_P (stmt_info)
9537 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9538 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9539 -mveclibabi= takes preference over library functions with
9540 the simd attribute. */
9541 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9542 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9543 cost_vec)
9544 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9545 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9546 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9547 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9548 cost_vec)
9549 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9550 || vectorizable_reduction (stmt_info, NULL, NULL, node,
9551 node_instance, cost_vec)
9552 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9553 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9554 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9555 cost_vec)
9556 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9557 cost_vec));
9558 else
9560 if (bb_vinfo)
9561 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9562 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9563 cost_vec)
9564 || vectorizable_conversion (stmt_info, NULL, NULL, node,
9565 cost_vec)
9566 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9567 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9568 || vectorizable_assignment (stmt_info, NULL, NULL, node,
9569 cost_vec)
9570 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9571 cost_vec)
9572 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9573 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9574 cost_vec)
9575 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9576 cost_vec));
9579 if (!ok)
9580 return opt_result::failure_at (stmt_info->stmt,
9581 "not vectorized:"
9582 " relevant stmt not supported: %G",
9583 stmt_info->stmt);
9585 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9586 need extra handling, except for vectorizable reductions. */
9587 if (!bb_vinfo
9588 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9589 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9590 return opt_result::failure_at (stmt_info->stmt,
9591 "not vectorized:"
9592 " live stmt not supported: %G",
9593 stmt_info->stmt);
9595 return opt_result::success ();
9599 /* Function vect_transform_stmt.
9601 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9603 bool
9604 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9605 slp_tree slp_node, slp_instance slp_node_instance)
9607 vec_info *vinfo = stmt_info->vinfo;
9608 bool is_store = false;
9609 stmt_vec_info vec_stmt = NULL;
9610 bool done;
9612 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9613 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9615 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9616 && nested_in_vect_loop_p
9617 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9618 stmt_info));
9620 gimple *stmt = stmt_info->stmt;
9621 switch (STMT_VINFO_TYPE (stmt_info))
9623 case type_demotion_vec_info_type:
9624 case type_promotion_vec_info_type:
9625 case type_conversion_vec_info_type:
9626 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9627 NULL);
9628 gcc_assert (done);
9629 break;
9631 case induc_vec_info_type:
9632 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9633 NULL);
9634 gcc_assert (done);
9635 break;
9637 case shift_vec_info_type:
9638 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9639 gcc_assert (done);
9640 break;
9642 case op_vec_info_type:
9643 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9644 NULL);
9645 gcc_assert (done);
9646 break;
9648 case assignment_vec_info_type:
9649 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9650 NULL);
9651 gcc_assert (done);
9652 break;
9654 case load_vec_info_type:
9655 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9656 slp_node_instance, NULL);
9657 gcc_assert (done);
9658 break;
9660 case store_vec_info_type:
9661 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9662 gcc_assert (done);
9663 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9665 /* In case of interleaving, the whole chain is vectorized when the
9666 last store in the chain is reached. Store stmts before the last
9667 one are skipped, and there vec_stmt_info shouldn't be freed
9668 meanwhile. */
9669 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9670 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9671 is_store = true;
9673 else
9674 is_store = true;
9675 break;
9677 case condition_vec_info_type:
9678 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
9679 slp_node, NULL);
9680 gcc_assert (done);
9681 break;
9683 case comparison_vec_info_type:
9684 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
9685 slp_node, NULL);
9686 gcc_assert (done);
9687 break;
9689 case call_vec_info_type:
9690 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9691 stmt = gsi_stmt (*gsi);
9692 break;
9694 case call_simd_clone_vec_info_type:
9695 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9696 slp_node, NULL);
9697 stmt = gsi_stmt (*gsi);
9698 break;
9700 case reduc_vec_info_type:
9701 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9702 slp_node_instance, NULL);
9703 gcc_assert (done);
9704 break;
9706 default:
9707 if (!STMT_VINFO_LIVE_P (stmt_info))
9709 if (dump_enabled_p ())
9710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9711 "stmt not supported.\n");
9712 gcc_unreachable ();
9716 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9717 This would break hybrid SLP vectorization. */
9718 if (slp_node)
9719 gcc_assert (!vec_stmt
9720 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9722 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9723 is being vectorized, but outside the immediately enclosing loop. */
9724 if (vec_stmt
9725 && nested_p
9726 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9727 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9728 || STMT_VINFO_RELEVANT (stmt_info) ==
9729 vect_used_in_outer_by_reduction))
9731 struct loop *innerloop = LOOP_VINFO_LOOP (
9732 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9733 imm_use_iterator imm_iter;
9734 use_operand_p use_p;
9735 tree scalar_dest;
9737 if (dump_enabled_p ())
9738 dump_printf_loc (MSG_NOTE, vect_location,
9739 "Record the vdef for outer-loop vectorization.\n");
9741 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9742 (to be used when vectorizing outer-loop stmts that use the DEF of
9743 STMT). */
9744 if (gimple_code (stmt) == GIMPLE_PHI)
9745 scalar_dest = PHI_RESULT (stmt);
9746 else
9747 scalar_dest = gimple_get_lhs (stmt);
9749 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9750 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9752 stmt_vec_info exit_phi_info
9753 = vinfo->lookup_stmt (USE_STMT (use_p));
9754 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9758 /* Handle stmts whose DEF is used outside the loop-nest that is
9759 being vectorized. */
9760 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9762 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9763 NULL);
9764 gcc_assert (done);
9767 if (vec_stmt)
9768 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9770 return is_store;
9774 /* Remove a group of stores (for SLP or interleaving), free their
9775 stmt_vec_info. */
9777 void
9778 vect_remove_stores (stmt_vec_info first_stmt_info)
9780 vec_info *vinfo = first_stmt_info->vinfo;
9781 stmt_vec_info next_stmt_info = first_stmt_info;
9783 while (next_stmt_info)
9785 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9786 next_stmt_info = vect_orig_stmt (next_stmt_info);
9787 /* Free the attached stmt_vec_info and remove the stmt. */
9788 vinfo->remove_stmt (next_stmt_info);
9789 next_stmt_info = tmp;
9793 /* Function get_vectype_for_scalar_type_and_size.
9795 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9796 by the target. */
9798 tree
9799 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9801 tree orig_scalar_type = scalar_type;
9802 scalar_mode inner_mode;
9803 machine_mode simd_mode;
9804 poly_uint64 nunits;
9805 tree vectype;
9807 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9808 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9809 return NULL_TREE;
9811 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9813 /* For vector types of elements whose mode precision doesn't
9814 match their types precision we use a element type of mode
9815 precision. The vectorization routines will have to make sure
9816 they support the proper result truncation/extension.
9817 We also make sure to build vector types with INTEGER_TYPE
9818 component type only. */
9819 if (INTEGRAL_TYPE_P (scalar_type)
9820 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9821 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9822 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9823 TYPE_UNSIGNED (scalar_type));
9825 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9826 When the component mode passes the above test simply use a type
9827 corresponding to that mode. The theory is that any use that
9828 would cause problems with this will disable vectorization anyway. */
9829 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9830 && !INTEGRAL_TYPE_P (scalar_type))
9831 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9833 /* We can't build a vector type of elements with alignment bigger than
9834 their size. */
9835 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9836 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9837 TYPE_UNSIGNED (scalar_type));
9839 /* If we felt back to using the mode fail if there was
9840 no scalar type for it. */
9841 if (scalar_type == NULL_TREE)
9842 return NULL_TREE;
9844 /* If no size was supplied use the mode the target prefers. Otherwise
9845 lookup a vector mode of the specified size. */
9846 if (known_eq (size, 0U))
9847 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9848 else if (!multiple_p (size, nbytes, &nunits)
9849 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9850 return NULL_TREE;
9851 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9852 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9853 return NULL_TREE;
9855 vectype = build_vector_type (scalar_type, nunits);
9857 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9858 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9859 return NULL_TREE;
9861 /* Re-attach the address-space qualifier if we canonicalized the scalar
9862 type. */
9863 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9864 return build_qualified_type
9865 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9867 return vectype;
9870 poly_uint64 current_vector_size;
9872 /* Function get_vectype_for_scalar_type.
9874 Returns the vector type corresponding to SCALAR_TYPE as supported
9875 by the target. */
9877 tree
9878 get_vectype_for_scalar_type (tree scalar_type)
9880 tree vectype;
9881 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9882 current_vector_size);
9883 if (vectype
9884 && known_eq (current_vector_size, 0U))
9885 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9886 return vectype;
9889 /* Function get_mask_type_for_scalar_type.
9891 Returns the mask type corresponding to a result of comparison
9892 of vectors of specified SCALAR_TYPE as supported by target. */
9894 tree
9895 get_mask_type_for_scalar_type (tree scalar_type)
9897 tree vectype = get_vectype_for_scalar_type (scalar_type);
9899 if (!vectype)
9900 return NULL;
9902 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9903 current_vector_size);
9906 /* Function get_same_sized_vectype
9908 Returns a vector type corresponding to SCALAR_TYPE of size
9909 VECTOR_TYPE if supported by the target. */
9911 tree
9912 get_same_sized_vectype (tree scalar_type, tree vector_type)
9914 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9915 return build_same_sized_truth_vector_type (vector_type);
9917 return get_vectype_for_scalar_type_and_size
9918 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9921 /* Function vect_is_simple_use.
9923 Input:
9924 VINFO - the vect info of the loop or basic block that is being vectorized.
9925 OPERAND - operand in the loop or bb.
9926 Output:
9927 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
9928 case OPERAND is an SSA_NAME that is defined in the vectorizable region
9929 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
9930 the definition could be anywhere in the function
9931 DT - the type of definition
9933 Returns whether a stmt with OPERAND can be vectorized.
9934 For loops, supportable operands are constants, loop invariants, and operands
9935 that are defined by the current iteration of the loop. Unsupportable
9936 operands are those that are defined by a previous iteration of the loop (as
9937 is the case in reduction/induction computations).
9938 For basic blocks, supportable operands are constants and bb invariants.
9939 For now, operands defined outside the basic block are not supported. */
9941 bool
9942 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
9943 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
9945 if (def_stmt_info_out)
9946 *def_stmt_info_out = NULL;
9947 if (def_stmt_out)
9948 *def_stmt_out = NULL;
9949 *dt = vect_unknown_def_type;
9951 if (dump_enabled_p ())
9953 dump_printf_loc (MSG_NOTE, vect_location,
9954 "vect_is_simple_use: operand ");
9955 if (TREE_CODE (operand) == SSA_NAME
9956 && !SSA_NAME_IS_DEFAULT_DEF (operand))
9957 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
9958 else
9959 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9962 if (CONSTANT_CLASS_P (operand))
9963 *dt = vect_constant_def;
9964 else if (is_gimple_min_invariant (operand))
9965 *dt = vect_external_def;
9966 else if (TREE_CODE (operand) != SSA_NAME)
9967 *dt = vect_unknown_def_type;
9968 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
9969 *dt = vect_external_def;
9970 else
9972 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
9973 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
9974 if (!stmt_vinfo)
9975 *dt = vect_external_def;
9976 else
9978 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
9979 def_stmt = stmt_vinfo->stmt;
9980 switch (gimple_code (def_stmt))
9982 case GIMPLE_PHI:
9983 case GIMPLE_ASSIGN:
9984 case GIMPLE_CALL:
9985 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9986 break;
9987 default:
9988 *dt = vect_unknown_def_type;
9989 break;
9991 if (def_stmt_info_out)
9992 *def_stmt_info_out = stmt_vinfo;
9994 if (def_stmt_out)
9995 *def_stmt_out = def_stmt;
9998 if (dump_enabled_p ())
10000 dump_printf (MSG_NOTE, ", type of def: ");
10001 switch (*dt)
10003 case vect_uninitialized_def:
10004 dump_printf (MSG_NOTE, "uninitialized\n");
10005 break;
10006 case vect_constant_def:
10007 dump_printf (MSG_NOTE, "constant\n");
10008 break;
10009 case vect_external_def:
10010 dump_printf (MSG_NOTE, "external\n");
10011 break;
10012 case vect_internal_def:
10013 dump_printf (MSG_NOTE, "internal\n");
10014 break;
10015 case vect_induction_def:
10016 dump_printf (MSG_NOTE, "induction\n");
10017 break;
10018 case vect_reduction_def:
10019 dump_printf (MSG_NOTE, "reduction\n");
10020 break;
10021 case vect_double_reduction_def:
10022 dump_printf (MSG_NOTE, "double reduction\n");
10023 break;
10024 case vect_nested_cycle:
10025 dump_printf (MSG_NOTE, "nested cycle\n");
10026 break;
10027 case vect_unknown_def_type:
10028 dump_printf (MSG_NOTE, "unknown\n");
10029 break;
10033 if (*dt == vect_unknown_def_type)
10035 if (dump_enabled_p ())
10036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10037 "Unsupported pattern.\n");
10038 return false;
10041 return true;
10044 /* Function vect_is_simple_use.
10046 Same as vect_is_simple_use but also determines the vector operand
10047 type of OPERAND and stores it to *VECTYPE. If the definition of
10048 OPERAND is vect_uninitialized_def, vect_constant_def or
10049 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10050 is responsible to compute the best suited vector type for the
10051 scalar operand. */
10053 bool
10054 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10055 tree *vectype, stmt_vec_info *def_stmt_info_out,
10056 gimple **def_stmt_out)
10058 stmt_vec_info def_stmt_info;
10059 gimple *def_stmt;
10060 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10061 return false;
10063 if (def_stmt_out)
10064 *def_stmt_out = def_stmt;
10065 if (def_stmt_info_out)
10066 *def_stmt_info_out = def_stmt_info;
10068 /* Now get a vector type if the def is internal, otherwise supply
10069 NULL_TREE and leave it up to the caller to figure out a proper
10070 type for the use stmt. */
10071 if (*dt == vect_internal_def
10072 || *dt == vect_induction_def
10073 || *dt == vect_reduction_def
10074 || *dt == vect_double_reduction_def
10075 || *dt == vect_nested_cycle)
10077 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10078 gcc_assert (*vectype != NULL_TREE);
10079 if (dump_enabled_p ())
10080 dump_printf_loc (MSG_NOTE, vect_location,
10081 "vect_is_simple_use: vectype %T\n", *vectype);
10083 else if (*dt == vect_uninitialized_def
10084 || *dt == vect_constant_def
10085 || *dt == vect_external_def)
10086 *vectype = NULL_TREE;
10087 else
10088 gcc_unreachable ();
10090 return true;
10094 /* Function supportable_widening_operation
10096 Check whether an operation represented by the code CODE is a
10097 widening operation that is supported by the target platform in
10098 vector form (i.e., when operating on arguments of type VECTYPE_IN
10099 producing a result of type VECTYPE_OUT).
10101 Widening operations we currently support are NOP (CONVERT), FLOAT,
10102 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10103 are supported by the target platform either directly (via vector
10104 tree-codes), or via target builtins.
10106 Output:
10107 - CODE1 and CODE2 are codes of vector operations to be used when
10108 vectorizing the operation, if available.
10109 - MULTI_STEP_CVT determines the number of required intermediate steps in
10110 case of multi-step conversion (like char->short->int - in that case
10111 MULTI_STEP_CVT will be 1).
10112 - INTERM_TYPES contains the intermediate type required to perform the
10113 widening operation (short in the above example). */
10115 bool
10116 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10117 tree vectype_out, tree vectype_in,
10118 enum tree_code *code1, enum tree_code *code2,
10119 int *multi_step_cvt,
10120 vec<tree> *interm_types)
10122 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10123 struct loop *vect_loop = NULL;
10124 machine_mode vec_mode;
10125 enum insn_code icode1, icode2;
10126 optab optab1, optab2;
10127 tree vectype = vectype_in;
10128 tree wide_vectype = vectype_out;
10129 enum tree_code c1, c2;
10130 int i;
10131 tree prev_type, intermediate_type;
10132 machine_mode intermediate_mode, prev_mode;
10133 optab optab3, optab4;
10135 *multi_step_cvt = 0;
10136 if (loop_info)
10137 vect_loop = LOOP_VINFO_LOOP (loop_info);
10139 switch (code)
10141 case WIDEN_MULT_EXPR:
10142 /* The result of a vectorized widening operation usually requires
10143 two vectors (because the widened results do not fit into one vector).
10144 The generated vector results would normally be expected to be
10145 generated in the same order as in the original scalar computation,
10146 i.e. if 8 results are generated in each vector iteration, they are
10147 to be organized as follows:
10148 vect1: [res1,res2,res3,res4],
10149 vect2: [res5,res6,res7,res8].
10151 However, in the special case that the result of the widening
10152 operation is used in a reduction computation only, the order doesn't
10153 matter (because when vectorizing a reduction we change the order of
10154 the computation). Some targets can take advantage of this and
10155 generate more efficient code. For example, targets like Altivec,
10156 that support widen_mult using a sequence of {mult_even,mult_odd}
10157 generate the following vectors:
10158 vect1: [res1,res3,res5,res7],
10159 vect2: [res2,res4,res6,res8].
10161 When vectorizing outer-loops, we execute the inner-loop sequentially
10162 (each vectorized inner-loop iteration contributes to VF outer-loop
10163 iterations in parallel). We therefore don't allow to change the
10164 order of the computation in the inner-loop during outer-loop
10165 vectorization. */
10166 /* TODO: Another case in which order doesn't *really* matter is when we
10167 widen and then contract again, e.g. (short)((int)x * y >> 8).
10168 Normally, pack_trunc performs an even/odd permute, whereas the
10169 repack from an even/odd expansion would be an interleave, which
10170 would be significantly simpler for e.g. AVX2. */
10171 /* In any case, in order to avoid duplicating the code below, recurse
10172 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10173 are properly set up for the caller. If we fail, we'll continue with
10174 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10175 if (vect_loop
10176 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10177 && !nested_in_vect_loop_p (vect_loop, stmt_info)
10178 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10179 stmt_info, vectype_out,
10180 vectype_in, code1, code2,
10181 multi_step_cvt, interm_types))
10183 /* Elements in a vector with vect_used_by_reduction property cannot
10184 be reordered if the use chain with this property does not have the
10185 same operation. One such an example is s += a * b, where elements
10186 in a and b cannot be reordered. Here we check if the vector defined
10187 by STMT is only directly used in the reduction statement. */
10188 tree lhs = gimple_assign_lhs (stmt_info->stmt);
10189 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10190 if (use_stmt_info
10191 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10192 return true;
10194 c1 = VEC_WIDEN_MULT_LO_EXPR;
10195 c2 = VEC_WIDEN_MULT_HI_EXPR;
10196 break;
10198 case DOT_PROD_EXPR:
10199 c1 = DOT_PROD_EXPR;
10200 c2 = DOT_PROD_EXPR;
10201 break;
10203 case SAD_EXPR:
10204 c1 = SAD_EXPR;
10205 c2 = SAD_EXPR;
10206 break;
10208 case VEC_WIDEN_MULT_EVEN_EXPR:
10209 /* Support the recursion induced just above. */
10210 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10211 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10212 break;
10214 case WIDEN_LSHIFT_EXPR:
10215 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10216 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10217 break;
10219 CASE_CONVERT:
10220 c1 = VEC_UNPACK_LO_EXPR;
10221 c2 = VEC_UNPACK_HI_EXPR;
10222 break;
10224 case FLOAT_EXPR:
10225 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10226 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10227 break;
10229 case FIX_TRUNC_EXPR:
10230 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10231 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10232 break;
10234 default:
10235 gcc_unreachable ();
10238 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10239 std::swap (c1, c2);
10241 if (code == FIX_TRUNC_EXPR)
10243 /* The signedness is determined from output operand. */
10244 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10245 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10247 else
10249 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10250 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10253 if (!optab1 || !optab2)
10254 return false;
10256 vec_mode = TYPE_MODE (vectype);
10257 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10258 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10259 return false;
10261 *code1 = c1;
10262 *code2 = c2;
10264 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10265 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10266 /* For scalar masks we may have different boolean
10267 vector types having the same QImode. Thus we
10268 add additional check for elements number. */
10269 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10270 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10271 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10273 /* Check if it's a multi-step conversion that can be done using intermediate
10274 types. */
10276 prev_type = vectype;
10277 prev_mode = vec_mode;
10279 if (!CONVERT_EXPR_CODE_P (code))
10280 return false;
10282 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10283 intermediate steps in promotion sequence. We try
10284 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10285 not. */
10286 interm_types->create (MAX_INTERM_CVT_STEPS);
10287 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10289 intermediate_mode = insn_data[icode1].operand[0].mode;
10290 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10292 intermediate_type = vect_halve_mask_nunits (prev_type);
10293 if (intermediate_mode != TYPE_MODE (intermediate_type))
10294 return false;
10296 else
10297 intermediate_type
10298 = lang_hooks.types.type_for_mode (intermediate_mode,
10299 TYPE_UNSIGNED (prev_type));
10301 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10302 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10304 if (!optab3 || !optab4
10305 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10306 || insn_data[icode1].operand[0].mode != intermediate_mode
10307 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10308 || insn_data[icode2].operand[0].mode != intermediate_mode
10309 || ((icode1 = optab_handler (optab3, intermediate_mode))
10310 == CODE_FOR_nothing)
10311 || ((icode2 = optab_handler (optab4, intermediate_mode))
10312 == CODE_FOR_nothing))
10313 break;
10315 interm_types->quick_push (intermediate_type);
10316 (*multi_step_cvt)++;
10318 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10319 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10320 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10321 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10322 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10324 prev_type = intermediate_type;
10325 prev_mode = intermediate_mode;
10328 interm_types->release ();
10329 return false;
10333 /* Function supportable_narrowing_operation
10335 Check whether an operation represented by the code CODE is a
10336 narrowing operation that is supported by the target platform in
10337 vector form (i.e., when operating on arguments of type VECTYPE_IN
10338 and producing a result of type VECTYPE_OUT).
10340 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10341 and FLOAT. This function checks if these operations are supported by
10342 the target platform directly via vector tree-codes.
10344 Output:
10345 - CODE1 is the code of a vector operation to be used when
10346 vectorizing the operation, if available.
10347 - MULTI_STEP_CVT determines the number of required intermediate steps in
10348 case of multi-step conversion (like int->short->char - in that case
10349 MULTI_STEP_CVT will be 1).
10350 - INTERM_TYPES contains the intermediate type required to perform the
10351 narrowing operation (short in the above example). */
10353 bool
10354 supportable_narrowing_operation (enum tree_code code,
10355 tree vectype_out, tree vectype_in,
10356 enum tree_code *code1, int *multi_step_cvt,
10357 vec<tree> *interm_types)
10359 machine_mode vec_mode;
10360 enum insn_code icode1;
10361 optab optab1, interm_optab;
10362 tree vectype = vectype_in;
10363 tree narrow_vectype = vectype_out;
10364 enum tree_code c1;
10365 tree intermediate_type, prev_type;
10366 machine_mode intermediate_mode, prev_mode;
10367 int i;
10368 bool uns;
10370 *multi_step_cvt = 0;
10371 switch (code)
10373 CASE_CONVERT:
10374 c1 = VEC_PACK_TRUNC_EXPR;
10375 break;
10377 case FIX_TRUNC_EXPR:
10378 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10379 break;
10381 case FLOAT_EXPR:
10382 c1 = VEC_PACK_FLOAT_EXPR;
10383 break;
10385 default:
10386 gcc_unreachable ();
10389 if (code == FIX_TRUNC_EXPR)
10390 /* The signedness is determined from output operand. */
10391 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10392 else
10393 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10395 if (!optab1)
10396 return false;
10398 vec_mode = TYPE_MODE (vectype);
10399 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10400 return false;
10402 *code1 = c1;
10404 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10405 /* For scalar masks we may have different boolean
10406 vector types having the same QImode. Thus we
10407 add additional check for elements number. */
10408 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10409 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10410 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10412 if (code == FLOAT_EXPR)
10413 return false;
10415 /* Check if it's a multi-step conversion that can be done using intermediate
10416 types. */
10417 prev_mode = vec_mode;
10418 prev_type = vectype;
10419 if (code == FIX_TRUNC_EXPR)
10420 uns = TYPE_UNSIGNED (vectype_out);
10421 else
10422 uns = TYPE_UNSIGNED (vectype);
10424 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10425 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10426 costly than signed. */
10427 if (code == FIX_TRUNC_EXPR && uns)
10429 enum insn_code icode2;
10431 intermediate_type
10432 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10433 interm_optab
10434 = optab_for_tree_code (c1, intermediate_type, optab_default);
10435 if (interm_optab != unknown_optab
10436 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10437 && insn_data[icode1].operand[0].mode
10438 == insn_data[icode2].operand[0].mode)
10440 uns = false;
10441 optab1 = interm_optab;
10442 icode1 = icode2;
10446 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10447 intermediate steps in promotion sequence. We try
10448 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10449 interm_types->create (MAX_INTERM_CVT_STEPS);
10450 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10452 intermediate_mode = insn_data[icode1].operand[0].mode;
10453 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10455 intermediate_type = vect_double_mask_nunits (prev_type);
10456 if (intermediate_mode != TYPE_MODE (intermediate_type))
10457 return false;
10459 else
10460 intermediate_type
10461 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10462 interm_optab
10463 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10464 optab_default);
10465 if (!interm_optab
10466 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10467 || insn_data[icode1].operand[0].mode != intermediate_mode
10468 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10469 == CODE_FOR_nothing))
10470 break;
10472 interm_types->quick_push (intermediate_type);
10473 (*multi_step_cvt)++;
10475 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10476 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10477 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10478 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10480 prev_mode = intermediate_mode;
10481 prev_type = intermediate_type;
10482 optab1 = interm_optab;
10485 interm_types->release ();
10486 return false;
10489 /* Generate and return a statement that sets vector mask MASK such that
10490 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10492 gcall *
10493 vect_gen_while (tree mask, tree start_index, tree end_index)
10495 tree cmp_type = TREE_TYPE (start_index);
10496 tree mask_type = TREE_TYPE (mask);
10497 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10498 cmp_type, mask_type,
10499 OPTIMIZE_FOR_SPEED));
10500 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10501 start_index, end_index,
10502 build_zero_cst (mask_type));
10503 gimple_call_set_lhs (call, mask);
10504 return call;
10507 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10508 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10510 tree
10511 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10512 tree end_index)
10514 tree tmp = make_ssa_name (mask_type);
10515 gcall *call = vect_gen_while (tmp, start_index, end_index);
10516 gimple_seq_add_stmt (seq, call);
10517 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10520 /* Try to compute the vector types required to vectorize STMT_INFO,
10521 returning true on success and false if vectorization isn't possible.
10523 On success:
10525 - Set *STMT_VECTYPE_OUT to:
10526 - NULL_TREE if the statement doesn't need to be vectorized;
10527 - boolean_type_node if the statement is a boolean operation whose
10528 vector type can only be determined once all the other vector types
10529 are known; and
10530 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10532 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10533 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10534 statement does not help to determine the overall number of units. */
10536 opt_result
10537 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10538 tree *stmt_vectype_out,
10539 tree *nunits_vectype_out)
10541 gimple *stmt = stmt_info->stmt;
10543 *stmt_vectype_out = NULL_TREE;
10544 *nunits_vectype_out = NULL_TREE;
10546 if (gimple_get_lhs (stmt) == NULL_TREE
10547 /* MASK_STORE has no lhs, but is ok. */
10548 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10550 if (is_a <gcall *> (stmt))
10552 /* Ignore calls with no lhs. These must be calls to
10553 #pragma omp simd functions, and what vectorization factor
10554 it really needs can't be determined until
10555 vectorizable_simd_clone_call. */
10556 if (dump_enabled_p ())
10557 dump_printf_loc (MSG_NOTE, vect_location,
10558 "defer to SIMD clone analysis.\n");
10559 return opt_result::success ();
10562 return opt_result::failure_at (stmt,
10563 "not vectorized: irregular stmt.%G", stmt);
10566 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10567 return opt_result::failure_at (stmt,
10568 "not vectorized: vector stmt in loop:%G",
10569 stmt);
10571 tree vectype;
10572 tree scalar_type = NULL_TREE;
10573 if (STMT_VINFO_VECTYPE (stmt_info))
10574 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10575 else
10577 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10578 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10579 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10580 else
10581 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10583 /* Pure bool ops don't participate in number-of-units computation.
10584 For comparisons use the types being compared. */
10585 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10586 && is_gimple_assign (stmt)
10587 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10589 *stmt_vectype_out = boolean_type_node;
10591 tree rhs1 = gimple_assign_rhs1 (stmt);
10592 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10593 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10594 scalar_type = TREE_TYPE (rhs1);
10595 else
10597 if (dump_enabled_p ())
10598 dump_printf_loc (MSG_NOTE, vect_location,
10599 "pure bool operation.\n");
10600 return opt_result::success ();
10604 if (dump_enabled_p ())
10605 dump_printf_loc (MSG_NOTE, vect_location,
10606 "get vectype for scalar type: %T\n", scalar_type);
10607 vectype = get_vectype_for_scalar_type (scalar_type);
10608 if (!vectype)
10609 return opt_result::failure_at (stmt,
10610 "not vectorized:"
10611 " unsupported data-type %T\n",
10612 scalar_type);
10614 if (!*stmt_vectype_out)
10615 *stmt_vectype_out = vectype;
10617 if (dump_enabled_p ())
10618 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
10621 /* Don't try to compute scalar types if the stmt produces a boolean
10622 vector; use the existing vector type instead. */
10623 tree nunits_vectype;
10624 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10625 nunits_vectype = vectype;
10626 else
10628 /* The number of units is set according to the smallest scalar
10629 type (or the largest vector size, but we only support one
10630 vector size per vectorization). */
10631 if (*stmt_vectype_out != boolean_type_node)
10633 HOST_WIDE_INT dummy;
10634 scalar_type = vect_get_smallest_scalar_type (stmt_info,
10635 &dummy, &dummy);
10637 if (dump_enabled_p ())
10638 dump_printf_loc (MSG_NOTE, vect_location,
10639 "get vectype for scalar type: %T\n", scalar_type);
10640 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10642 if (!nunits_vectype)
10643 return opt_result::failure_at (stmt,
10644 "not vectorized: unsupported data-type %T\n",
10645 scalar_type);
10647 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10648 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10649 return opt_result::failure_at (stmt,
10650 "not vectorized: different sized vector "
10651 "types in statement, %T and %T\n",
10652 vectype, nunits_vectype);
10654 if (dump_enabled_p ())
10656 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10657 nunits_vectype);
10659 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10660 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10661 dump_printf (MSG_NOTE, "\n");
10664 *nunits_vectype_out = nunits_vectype;
10665 return opt_result::success ();
10668 /* Try to determine the correct vector type for STMT_INFO, which is a
10669 statement that produces a scalar boolean result. Return the vector
10670 type on success, otherwise return NULL_TREE. */
10672 opt_tree
10673 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10675 gimple *stmt = stmt_info->stmt;
10676 tree mask_type = NULL;
10677 tree vectype, scalar_type;
10679 if (is_gimple_assign (stmt)
10680 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10681 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10683 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10684 mask_type = get_mask_type_for_scalar_type (scalar_type);
10686 if (!mask_type)
10687 return opt_tree::failure_at (stmt,
10688 "not vectorized: unsupported mask\n");
10690 else
10692 tree rhs;
10693 ssa_op_iter iter;
10694 enum vect_def_type dt;
10696 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10698 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10699 return opt_tree::failure_at (stmt,
10700 "not vectorized:can't compute mask"
10701 " type for statement, %G", stmt);
10703 /* No vectype probably means external definition.
10704 Allow it in case there is another operand which
10705 allows to determine mask type. */
10706 if (!vectype)
10707 continue;
10709 if (!mask_type)
10710 mask_type = vectype;
10711 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10712 TYPE_VECTOR_SUBPARTS (vectype)))
10713 return opt_tree::failure_at (stmt,
10714 "not vectorized: different sized mask"
10715 " types in statement, %T and %T\n",
10716 mask_type, vectype);
10717 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10718 != VECTOR_BOOLEAN_TYPE_P (vectype))
10719 return opt_tree::failure_at (stmt,
10720 "not vectorized: mixed mask and "
10721 "nonmask vector types in statement, "
10722 "%T and %T\n",
10723 mask_type, vectype);
10726 /* We may compare boolean value loaded as vector of integers.
10727 Fix mask_type in such case. */
10728 if (mask_type
10729 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10730 && gimple_code (stmt) == GIMPLE_ASSIGN
10731 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10732 mask_type = build_same_sized_truth_vector_type (mask_type);
10735 /* No mask_type should mean loop invariant predicate.
10736 This is probably a subject for optimization in if-conversion. */
10737 if (!mask_type)
10738 return opt_tree::failure_at (stmt,
10739 "not vectorized: can't compute mask type "
10740 "for statement: %G", stmt);
10742 return opt_tree::success (mask_type);