* gcc.pot: Regenerate.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob41a7eb0b9a79ba14d000553476e4ef2a13840946
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
58 /* For lang_hooks.types.type_for_mode. */
59 #include "langhooks.h"
61 /* Return the vectorized type for the given statement. */
63 tree
64 stmt_vectype (struct _stmt_vec_info *stmt_info)
66 return STMT_VINFO_VECTYPE (stmt_info);
69 /* Return TRUE iff the given statement is in an inner loop relative to
70 the loop being vectorized. */
71 bool
72 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
74 gimple *stmt = STMT_VINFO_STMT (stmt_info);
75 basic_block bb = gimple_bb (stmt);
76 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
77 struct loop* loop;
79 if (!loop_vinfo)
80 return false;
82 loop = LOOP_VINFO_LOOP (loop_vinfo);
84 return (bb->loop_father == loop->inner);
87 /* Record the cost of a statement, either by directly informing the
88 target model or by saving it in a vector for later processing.
89 Return a preliminary estimate of the statement's cost. */
91 unsigned
92 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
93 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
94 int misalign, enum vect_cost_model_location where)
96 if ((kind == vector_load || kind == unaligned_load)
97 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
98 kind = vector_gather_load;
99 if ((kind == vector_store || kind == unaligned_store)
100 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
101 kind = vector_scatter_store;
103 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
104 body_cost_vec->safe_push (si);
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113 static tree
114 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT_INFO and the vector is associated
123 with scalar destination SCALAR_DEST. */
125 static tree
126 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
127 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
129 tree vect_type, vect, vect_name, array_ref;
130 gimple *new_stmt;
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
144 return vect_name;
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT_INFO. */
151 static void
152 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
153 tree vect, tree array, unsigned HOST_WIDE_INT n)
155 tree array_ref;
156 gimple *new_stmt;
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
170 static tree
171 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 tree mem_ref;
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
178 return mem_ref;
181 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
182 Emit the clobber before *GSI. */
184 static void
185 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
186 tree var)
188 tree clobber = build_clobber (TREE_TYPE (var));
189 gimple *new_stmt = gimple_build_assign (var, clobber);
190 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
193 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
195 /* Function vect_mark_relevant.
197 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
199 static void
200 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
201 enum vect_relevant relevant, bool live_p)
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
206 if (dump_enabled_p ())
207 dump_printf_loc (MSG_NOTE, vect_location,
208 "mark relevant %d, live %d: %G", relevant, live_p,
209 stmt_info->stmt);
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_NOTE, vect_location,
224 "last stmt in pattern. don't mark"
225 " relevant/live.\n");
226 stmt_vec_info old_stmt_info = stmt_info;
227 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
228 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
229 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
230 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
233 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
234 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
235 STMT_VINFO_RELEVANT (stmt_info) = relevant;
237 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
238 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240 if (dump_enabled_p ())
241 dump_printf_loc (MSG_NOTE, vect_location,
242 "already marked relevant/live.\n");
243 return;
246 worklist->safe_push (stmt_info);
250 /* Function is_simple_and_all_uses_invariant
252 Return true if STMT_INFO is simple and all uses of it are invariant. */
254 bool
255 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
256 loop_vec_info loop_vinfo)
258 tree op;
259 ssa_op_iter iter;
261 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
262 if (!stmt)
263 return false;
265 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267 enum vect_def_type dt = vect_uninitialized_def;
269 if (!vect_is_simple_use (op, loop_vinfo, &dt))
271 if (dump_enabled_p ())
272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
273 "use not simple.\n");
274 return false;
277 if (dt != vect_external_def && dt != vect_constant_def)
278 return false;
280 return true;
283 /* Function vect_stmt_relevant_p.
285 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
286 is "relevant for vectorization".
288 A stmt is considered "relevant for vectorization" if:
289 - it has uses outside the loop.
290 - it has vdefs (it alters memory).
291 - control stmts in the loop (except for the exit condition).
293 CHECKME: what other side effects would the vectorizer allow? */
295 static bool
296 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
297 enum vect_relevant *relevant, bool *live_p)
299 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
300 ssa_op_iter op_iter;
301 imm_use_iterator imm_iter;
302 use_operand_p use_p;
303 def_operand_p def_p;
305 *relevant = vect_unused_in_scope;
306 *live_p = false;
308 /* cond stmt other than loop exit cond. */
309 if (is_ctrl_stmt (stmt_info->stmt)
310 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
311 *relevant = vect_used_in_scope;
313 /* changing memory. */
314 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
315 if (gimple_vdef (stmt_info->stmt)
316 && !gimple_clobber_p (stmt_info->stmt))
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant = vect_used_in_scope;
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
327 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 basic_block bb = gimple_bb (USE_STMT (use_p));
330 if (!flow_bb_inside_loop_p (loop, bb))
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE, vect_location,
334 "vec_stmt_relevant_p: used out of loop.\n");
336 if (is_gimple_debug (USE_STMT (use_p)))
337 continue;
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
342 gcc_assert (bb == single_exit (loop)->dest);
344 *live_p = true;
349 if (*live_p && *relevant == vect_unused_in_scope
350 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_NOTE, vect_location,
354 "vec_stmt_relevant_p: stmt live but not relevant.\n");
355 *relevant = vect_used_only_live;
358 return (*live_p || *relevant);
362 /* Function exist_non_indexing_operands_for_use_p
364 USE is one of the uses attached to STMT_INFO. Check if USE is
365 used in STMT_INFO for anything other than indexing an array. */
367 static bool
368 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
370 tree operand;
372 /* USE corresponds to some operand in STMT. If there is no data
373 reference in STMT, then any operand that corresponds to USE
374 is not indexing an array. */
375 if (!STMT_VINFO_DATA_REF (stmt_info))
376 return true;
378 /* STMT has a data_ref. FORNOW this means that its of one of
379 the following forms:
380 -1- ARRAY_REF = var
381 -2- var = ARRAY_REF
382 (This should have been verified in analyze_data_refs).
384 'var' in the second case corresponds to a def, not a use,
385 so USE cannot correspond to any operands that are not used
386 for array indexing.
388 Therefore, all we need to check is if STMT falls into the
389 first case, and whether var corresponds to USE. */
391 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
392 if (!assign || !gimple_assign_copy_p (assign))
394 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
395 if (call && gimple_call_internal_p (call))
397 internal_fn ifn = gimple_call_internal_fn (call);
398 int mask_index = internal_fn_mask_index (ifn);
399 if (mask_index >= 0
400 && use == gimple_call_arg (call, mask_index))
401 return true;
402 int stored_value_index = internal_fn_stored_value_index (ifn);
403 if (stored_value_index >= 0
404 && use == gimple_call_arg (call, stored_value_index))
405 return true;
406 if (internal_gather_scatter_fn_p (ifn)
407 && use == gimple_call_arg (call, 1))
408 return true;
410 return false;
413 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
414 return false;
415 operand = gimple_assign_rhs1 (assign);
416 if (TREE_CODE (operand) != SSA_NAME)
417 return false;
419 if (operand == use)
420 return true;
422 return false;
427 Function process_use.
429 Inputs:
430 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
431 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
432 that defined USE. This is done by calling mark_relevant and passing it
433 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
434 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435 be performed.
437 Outputs:
438 Generally, LIVE_P and RELEVANT are used to define the liveness and
439 relevance info of the DEF_STMT of this USE:
440 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
441 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
442 Exceptions:
443 - case 1: If USE is used only for address computations (e.g. array indexing),
444 which does not need to be directly vectorized, then the liveness/relevance
445 of the respective DEF_STMT is left unchanged.
446 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
447 we skip DEF_STMT cause it had already been processed.
448 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
449 "relevant" will be modified accordingly.
451 Return true if everything is as expected. Return false otherwise. */
453 static opt_result
454 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
455 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
456 bool force)
458 stmt_vec_info dstmt_vinfo;
459 basic_block bb, def_bb;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465 return opt_result::success ();
467 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468 return opt_result::failure_at (stmt_vinfo->stmt,
469 "not vectorized:"
470 " unsupported use in stmt.\n");
472 if (!dstmt_vinfo)
473 return opt_result::success ();
475 def_bb = gimple_bb (dstmt_vinfo->stmt);
477 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
478 DSTMT_VINFO must have already been processed, because this should be the
479 only way that STMT, which is a reduction-phi, was put in the worklist,
480 as there should be no other uses for DSTMT_VINFO in the loop. So we just
481 check that everything is as expected, and we are done. */
482 bb = gimple_bb (stmt_vinfo->stmt);
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
493 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
494 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
495 return opt_result::success ();
498 /* case 3a: outer-loop stmt defining an inner-loop stmt:
499 outer-loop-header-bb:
500 d = dstmt_vinfo
501 inner-loop:
502 stmt # use (d)
503 outer-loop-tail-bb:
504 ... */
505 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "outer-loop def-stmt defining inner-loop stmt.\n");
511 switch (relevant)
513 case vect_unused_in_scope:
514 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
515 vect_used_in_scope : vect_unused_in_scope;
516 break;
518 case vect_used_in_outer_by_reduction:
519 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
520 relevant = vect_used_by_reduction;
521 break;
523 case vect_used_in_outer:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
525 relevant = vect_used_in_scope;
526 break;
528 case vect_used_in_scope:
529 break;
531 default:
532 gcc_unreachable ();
536 /* case 3b: inner-loop stmt defining an outer-loop stmt:
537 outer-loop-header-bb:
539 inner-loop:
540 d = dstmt_vinfo
541 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
542 stmt # use (d) */
543 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
545 if (dump_enabled_p ())
546 dump_printf_loc (MSG_NOTE, vect_location,
547 "inner-loop def-stmt defining outer-loop stmt.\n");
549 switch (relevant)
551 case vect_unused_in_scope:
552 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
553 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
554 vect_used_in_outer_by_reduction : vect_unused_in_scope;
555 break;
557 case vect_used_by_reduction:
558 case vect_used_only_live:
559 relevant = vect_used_in_outer_by_reduction;
560 break;
562 case vect_used_in_scope:
563 relevant = vect_used_in_outer;
564 break;
566 default:
567 gcc_unreachable ();
570 /* We are also not interested in uses on loop PHI backedges that are
571 inductions. Otherwise we'll needlessly vectorize the IV increment
572 and cause hybrid SLP for SLP inductions. Unless the PHI is live
573 of course. */
574 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
575 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
576 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
577 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
578 loop_latch_edge (bb->loop_father))
579 == use))
581 if (dump_enabled_p ())
582 dump_printf_loc (MSG_NOTE, vect_location,
583 "induction value on backedge.\n");
584 return opt_result::success ();
588 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
589 return opt_result::success ();
593 /* Function vect_mark_stmts_to_be_vectorized.
595 Not all stmts in the loop need to be vectorized. For example:
597 for i...
598 for j...
599 1. T0 = i + j
600 2. T1 = a[T0]
602 3. j = j + 1
604 Stmt 1 and 3 do not need to be vectorized, because loop control and
605 addressing of vectorized data-refs are handled differently.
607 This pass detects such stmts. */
609 opt_result
610 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
612 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
613 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
614 unsigned int nbbs = loop->num_nodes;
615 gimple_stmt_iterator si;
616 unsigned int i;
617 basic_block bb;
618 bool live_p;
619 enum vect_relevant relevant;
621 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
623 auto_vec<stmt_vec_info, 64> worklist;
625 /* 1. Init worklist. */
626 for (i = 0; i < nbbs; i++)
628 bb = bbs[i];
629 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
631 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
632 if (dump_enabled_p ())
633 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
634 phi_info->stmt);
636 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
637 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
639 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
780 return res;
782 } /* while worklist */
784 return opt_result::success ();
787 /* Compute the prologue cost for invariant or constant operands. */
789 static unsigned
790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 unsigned opno, enum vect_def_type dt,
792 stmt_vector_for_cost *cost_vec)
794 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
795 tree op = gimple_op (stmt, opno);
796 unsigned prologue_cost = 0;
798 /* Without looking at the actual initializer a vector of
799 constants can be implemented as load from the constant pool.
800 When all elements are the same we can use a splat. */
801 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
802 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
803 unsigned num_vects_to_check;
804 unsigned HOST_WIDE_INT const_nunits;
805 unsigned nelt_limit;
806 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
807 && ! multiple_p (const_nunits, group_size))
809 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
810 nelt_limit = const_nunits;
812 else
814 /* If either the vector has variable length or the vectors
815 are composed of repeated whole groups we only need to
816 cost construction once. All vectors will be the same. */
817 num_vects_to_check = 1;
818 nelt_limit = group_size;
820 tree elt = NULL_TREE;
821 unsigned nelt = 0;
822 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
824 unsigned si = j % group_size;
825 if (nelt == 0)
826 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
827 /* ??? We're just tracking whether all operands of a single
828 vector initializer are the same, ideally we'd check if
829 we emitted the same one already. */
830 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
831 opno))
832 elt = NULL_TREE;
833 nelt++;
834 if (nelt == nelt_limit)
836 /* ??? We need to pass down stmt_info for a vector type
837 even if it points to the wrong stmt. */
838 prologue_cost += record_stmt_cost
839 (cost_vec, 1,
840 dt == vect_external_def
841 ? (elt ? scalar_to_vec : vec_construct)
842 : vector_load,
843 stmt_info, 0, vect_prologue);
844 nelt = 0;
848 return prologue_cost;
851 /* Function vect_model_simple_cost.
853 Models cost for simple operations, i.e. those that only emit ncopies of a
854 single op. Right now, this does not account for multiple insns that could
855 be generated for the single vector op. We will handle that shortly. */
857 static void
858 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
859 enum vect_def_type *dt,
860 int ndts,
861 slp_tree node,
862 stmt_vector_for_cost *cost_vec)
864 int inside_cost = 0, prologue_cost = 0;
866 gcc_assert (cost_vec != NULL);
868 /* ??? Somehow we need to fix this at the callers. */
869 if (node)
870 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
872 if (node)
874 /* Scan operands and account for prologue cost of constants/externals.
875 ??? This over-estimates cost for multiple uses and should be
876 re-engineered. */
877 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
878 tree lhs = gimple_get_lhs (stmt);
879 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
881 tree op = gimple_op (stmt, i);
882 enum vect_def_type dt;
883 if (!op || op == lhs)
884 continue;
885 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
886 && (dt == vect_constant_def || dt == vect_external_def))
887 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
888 i, dt, cost_vec);
891 else
892 /* Cost the "broadcast" of a scalar operand in to a vector operand.
893 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
894 cost model. */
895 for (int i = 0; i < ndts; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
898 stmt_info, 0, vect_prologue);
900 /* Adjust for two-operator SLP nodes. */
901 if (node && SLP_TREE_TWO_OPERATORS (node))
903 ncopies *= 2;
904 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
905 stmt_info, 0, vect_body);
908 /* Pass the inside-of-loop statements to the target-specific cost model. */
909 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
910 stmt_info, 0, vect_body);
912 if (dump_enabled_p ())
913 dump_printf_loc (MSG_NOTE, vect_location,
914 "vect_model_simple_cost: inside_cost = %d, "
915 "prologue_cost = %d .\n", inside_cost, prologue_cost);
919 /* Model cost for type demotion and promotion operations. PWR is normally
920 zero for single-step promotions and demotions. It will be one if
921 two-step promotion/demotion is required, and so on. Each additional
922 step doubles the number of instructions required. */
924 static void
925 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
926 enum vect_def_type *dt, int pwr,
927 stmt_vector_for_cost *cost_vec)
929 int i, tmp;
930 int inside_cost = 0, prologue_cost = 0;
932 for (i = 0; i < pwr + 1; i++)
934 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
935 (i + 1) : i;
936 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
937 vec_promote_demote, stmt_info, 0,
938 vect_body);
941 /* FORNOW: Assuming maximum 2 args per stmts. */
942 for (i = 0; i < 2; i++)
943 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
944 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
945 stmt_info, 0, vect_prologue);
947 if (dump_enabled_p ())
948 dump_printf_loc (MSG_NOTE, vect_location,
949 "vect_model_promotion_demotion_cost: inside_cost = %d, "
950 "prologue_cost = %d .\n", inside_cost, prologue_cost);
953 /* Returns true if the current function returns DECL. */
955 static bool
956 cfun_returns (tree decl)
958 edge_iterator ei;
959 edge e;
960 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
962 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
963 if (!ret)
964 continue;
965 if (gimple_return_retval (ret) == decl)
966 return true;
967 /* We often end up with an aggregate copy to the result decl,
968 handle that case as well. First skip intermediate clobbers
969 though. */
970 gimple *def = ret;
973 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
975 while (gimple_clobber_p (def));
976 if (is_a <gassign *> (def)
977 && gimple_assign_lhs (def) == gimple_return_retval (ret)
978 && gimple_assign_rhs1 (def) == decl)
979 return true;
981 return false;
984 /* Function vect_model_store_cost
986 Models cost for stores. In the case of grouped accesses, one access
987 has the overhead of the grouped access attributed to it. */
989 static void
990 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
991 enum vect_def_type dt,
992 vect_memory_access_type memory_access_type,
993 vec_load_store_type vls_type, slp_tree slp_node,
994 stmt_vector_for_cost *cost_vec)
996 unsigned int inside_cost = 0, prologue_cost = 0;
997 stmt_vec_info first_stmt_info = stmt_info;
998 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1000 /* ??? Somehow we need to fix this at the callers. */
1001 if (slp_node)
1002 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1004 if (vls_type == VLS_STORE_INVARIANT)
1006 if (slp_node)
1007 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1008 1, dt, cost_vec);
1009 else
1010 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1011 stmt_info, 0, vect_prologue);
1014 /* Grouped stores update all elements in the group at once,
1015 so we want the DR for the first statement. */
1016 if (!slp_node && grouped_access_p)
1017 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1019 /* True if we should include any once-per-group costs as well as
1020 the cost of the statement itself. For SLP we only get called
1021 once per group anyhow. */
1022 bool first_stmt_p = (first_stmt_info == stmt_info);
1024 /* We assume that the cost of a single store-lanes instruction is
1025 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1026 access is instead being provided by a permute-and-store operation,
1027 include the cost of the permutes. */
1028 if (first_stmt_p
1029 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1031 /* Uses a high and low interleave or shuffle operations for each
1032 needed permute. */
1033 int group_size = DR_GROUP_SIZE (first_stmt_info);
1034 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1035 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1036 stmt_info, 0, vect_body);
1038 if (dump_enabled_p ())
1039 dump_printf_loc (MSG_NOTE, vect_location,
1040 "vect_model_store_cost: strided group_size = %d .\n",
1041 group_size);
1044 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1045 /* Costs of the stores. */
1046 if (memory_access_type == VMAT_ELEMENTWISE
1047 || memory_access_type == VMAT_GATHER_SCATTER)
1049 /* N scalar stores plus extracting the elements. */
1050 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1051 inside_cost += record_stmt_cost (cost_vec,
1052 ncopies * assumed_nunits,
1053 scalar_store, stmt_info, 0, vect_body);
1055 else
1056 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1058 if (memory_access_type == VMAT_ELEMENTWISE
1059 || memory_access_type == VMAT_STRIDED_SLP)
1061 /* N scalar stores plus extracting the elements. */
1062 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1063 inside_cost += record_stmt_cost (cost_vec,
1064 ncopies * assumed_nunits,
1065 vec_to_scalar, stmt_info, 0, vect_body);
1068 /* When vectorizing a store into the function result assign
1069 a penalty if the function returns in a multi-register location.
1070 In this case we assume we'll end up with having to spill the
1071 vector result and do piecewise loads as a conservative estimate. */
1072 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1073 if (base
1074 && (TREE_CODE (base) == RESULT_DECL
1075 || (DECL_P (base) && cfun_returns (base)))
1076 && !aggregate_value_p (base, cfun->decl))
1078 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1079 /* ??? Handle PARALLEL in some way. */
1080 if (REG_P (reg))
1082 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1083 /* Assume that a single reg-reg move is possible and cheap,
1084 do not account for vector to gp register move cost. */
1085 if (nregs > 1)
1087 /* Spill. */
1088 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1089 vector_store,
1090 stmt_info, 0, vect_epilogue);
1091 /* Loads. */
1092 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1093 scalar_load,
1094 stmt_info, 0, vect_epilogue);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_store_cost: inside_cost = %d, "
1102 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1106 /* Calculate cost of DR's memory access. */
1107 void
1108 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1109 unsigned int *inside_cost,
1110 stmt_vector_for_cost *body_cost_vec)
1112 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1113 int alignment_support_scheme
1114 = vect_supportable_dr_alignment (dr_info, false);
1116 switch (alignment_support_scheme)
1118 case dr_aligned:
1120 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1121 vector_store, stmt_info, 0,
1122 vect_body);
1124 if (dump_enabled_p ())
1125 dump_printf_loc (MSG_NOTE, vect_location,
1126 "vect_model_store_cost: aligned.\n");
1127 break;
1130 case dr_unaligned_supported:
1132 /* Here, we assign an additional cost for the unaligned store. */
1133 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1134 unaligned_store, stmt_info,
1135 DR_MISALIGNMENT (dr_info),
1136 vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_store_cost: unaligned supported by "
1140 "hardware.\n");
1141 break;
1144 case dr_unaligned_unsupported:
1146 *inside_cost = VECT_MAX_COST;
1148 if (dump_enabled_p ())
1149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1150 "vect_model_store_cost: unsupported access.\n");
1151 break;
1154 default:
1155 gcc_unreachable ();
1160 /* Function vect_model_load_cost
1162 Models cost for loads. In the case of grouped accesses, one access has
1163 the overhead of the grouped access attributed to it. Since unaligned
1164 accesses are supported for loads, we also account for the costs of the
1165 access scheme chosen. */
1167 static void
1168 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1169 vect_memory_access_type memory_access_type,
1170 slp_instance instance,
1171 slp_tree slp_node,
1172 stmt_vector_for_cost *cost_vec)
1174 unsigned int inside_cost = 0, prologue_cost = 0;
1175 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1177 gcc_assert (cost_vec);
1179 /* ??? Somehow we need to fix this at the callers. */
1180 if (slp_node)
1181 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1183 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1185 /* If the load is permuted then the alignment is determined by
1186 the first group element not by the first scalar stmt DR. */
1187 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1188 /* Record the cost for the permutation. */
1189 unsigned n_perms;
1190 unsigned assumed_nunits
1191 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1192 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1193 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1194 slp_vf, instance, true,
1195 &n_perms);
1196 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1197 first_stmt_info, 0, vect_body);
1198 /* And adjust the number of loads performed. This handles
1199 redundancies as well as loads that are later dead. */
1200 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1201 bitmap_clear (perm);
1202 for (unsigned i = 0;
1203 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1204 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1205 ncopies = 0;
1206 bool load_seen = false;
1207 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1209 if (i % assumed_nunits == 0)
1211 if (load_seen)
1212 ncopies++;
1213 load_seen = false;
1215 if (bitmap_bit_p (perm, i))
1216 load_seen = true;
1218 if (load_seen)
1219 ncopies++;
1220 gcc_assert (ncopies
1221 <= (DR_GROUP_SIZE (first_stmt_info)
1222 - DR_GROUP_GAP (first_stmt_info)
1223 + assumed_nunits - 1) / assumed_nunits);
1226 /* Grouped loads read all elements in the group at once,
1227 so we want the DR for the first statement. */
1228 stmt_vec_info first_stmt_info = stmt_info;
1229 if (!slp_node && grouped_access_p)
1230 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1232 /* True if we should include any once-per-group costs as well as
1233 the cost of the statement itself. For SLP we only get called
1234 once per group anyhow. */
1235 bool first_stmt_p = (first_stmt_info == stmt_info);
1237 /* We assume that the cost of a single load-lanes instruction is
1238 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1239 access is instead being provided by a load-and-permute operation,
1240 include the cost of the permutes. */
1241 if (first_stmt_p
1242 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1244 /* Uses an even and odd extract operations or shuffle operations
1245 for each needed permute. */
1246 int group_size = DR_GROUP_SIZE (first_stmt_info);
1247 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1248 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1249 stmt_info, 0, vect_body);
1251 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE, vect_location,
1253 "vect_model_load_cost: strided group_size = %d .\n",
1254 group_size);
1257 /* The loads themselves. */
1258 if (memory_access_type == VMAT_ELEMENTWISE
1259 || memory_access_type == VMAT_GATHER_SCATTER)
1261 /* N scalar loads plus gathering them into a vector. */
1262 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1263 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1264 inside_cost += record_stmt_cost (cost_vec,
1265 ncopies * assumed_nunits,
1266 scalar_load, stmt_info, 0, vect_body);
1268 else
1269 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1270 &inside_cost, &prologue_cost,
1271 cost_vec, cost_vec, true);
1272 if (memory_access_type == VMAT_ELEMENTWISE
1273 || memory_access_type == VMAT_STRIDED_SLP)
1274 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1275 stmt_info, 0, vect_body);
1277 if (dump_enabled_p ())
1278 dump_printf_loc (MSG_NOTE, vect_location,
1279 "vect_model_load_cost: inside_cost = %d, "
1280 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1284 /* Calculate cost of DR's memory access. */
1285 void
1286 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1287 bool add_realign_cost, unsigned int *inside_cost,
1288 unsigned int *prologue_cost,
1289 stmt_vector_for_cost *prologue_cost_vec,
1290 stmt_vector_for_cost *body_cost_vec,
1291 bool record_prologue_costs)
1293 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1294 int alignment_support_scheme
1295 = vect_supportable_dr_alignment (dr_info, false);
1297 switch (alignment_support_scheme)
1299 case dr_aligned:
1301 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1302 stmt_info, 0, vect_body);
1304 if (dump_enabled_p ())
1305 dump_printf_loc (MSG_NOTE, vect_location,
1306 "vect_model_load_cost: aligned.\n");
1308 break;
1310 case dr_unaligned_supported:
1312 /* Here, we assign an additional cost for the unaligned load. */
1313 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1314 unaligned_load, stmt_info,
1315 DR_MISALIGNMENT (dr_info),
1316 vect_body);
1318 if (dump_enabled_p ())
1319 dump_printf_loc (MSG_NOTE, vect_location,
1320 "vect_model_load_cost: unaligned supported by "
1321 "hardware.\n");
1323 break;
1325 case dr_explicit_realign:
1327 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1328 vector_load, stmt_info, 0, vect_body);
1329 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1330 vec_perm, stmt_info, 0, vect_body);
1332 /* FIXME: If the misalignment remains fixed across the iterations of
1333 the containing loop, the following cost should be added to the
1334 prologue costs. */
1335 if (targetm.vectorize.builtin_mask_for_load)
1336 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1337 stmt_info, 0, vect_body);
1339 if (dump_enabled_p ())
1340 dump_printf_loc (MSG_NOTE, vect_location,
1341 "vect_model_load_cost: explicit realign\n");
1343 break;
1345 case dr_explicit_realign_optimized:
1347 if (dump_enabled_p ())
1348 dump_printf_loc (MSG_NOTE, vect_location,
1349 "vect_model_load_cost: unaligned software "
1350 "pipelined.\n");
1352 /* Unaligned software pipeline has a load of an address, an initial
1353 load, and possibly a mask operation to "prime" the loop. However,
1354 if this is an access in a group of loads, which provide grouped
1355 access, then the above cost should only be considered for one
1356 access in the group. Inside the loop, there is a load op
1357 and a realignment op. */
1359 if (add_realign_cost && record_prologue_costs)
1361 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1362 vector_stmt, stmt_info,
1363 0, vect_prologue);
1364 if (targetm.vectorize.builtin_mask_for_load)
1365 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1366 vector_stmt, stmt_info,
1367 0, vect_prologue);
1370 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1371 stmt_info, 0, vect_body);
1372 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1373 stmt_info, 0, vect_body);
1375 if (dump_enabled_p ())
1376 dump_printf_loc (MSG_NOTE, vect_location,
1377 "vect_model_load_cost: explicit realign optimized"
1378 "\n");
1380 break;
1383 case dr_unaligned_unsupported:
1385 *inside_cost = VECT_MAX_COST;
1387 if (dump_enabled_p ())
1388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1389 "vect_model_load_cost: unsupported access.\n");
1390 break;
1393 default:
1394 gcc_unreachable ();
1398 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1399 the loop preheader for the vectorized stmt STMT_VINFO. */
1401 static void
1402 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1403 gimple_stmt_iterator *gsi)
1405 if (gsi)
1406 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1407 else
1409 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1411 if (loop_vinfo)
1413 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1414 basic_block new_bb;
1415 edge pe;
1417 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1418 loop = loop->inner;
1420 pe = loop_preheader_edge (loop);
1421 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1422 gcc_assert (!new_bb);
1424 else
1426 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1427 basic_block bb;
1428 gimple_stmt_iterator gsi_bb_start;
1430 gcc_assert (bb_vinfo);
1431 bb = BB_VINFO_BB (bb_vinfo);
1432 gsi_bb_start = gsi_after_labels (bb);
1433 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1437 if (dump_enabled_p ())
1438 dump_printf_loc (MSG_NOTE, vect_location,
1439 "created new init_stmt: %G", new_stmt);
1442 /* Function vect_init_vector.
1444 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1445 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1446 vector type a vector with all elements equal to VAL is created first.
1447 Place the initialization at BSI if it is not NULL. Otherwise, place the
1448 initialization at the loop preheader.
1449 Return the DEF of INIT_STMT.
1450 It will be used in the vectorization of STMT_INFO. */
1452 tree
1453 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1454 gimple_stmt_iterator *gsi)
1456 gimple *init_stmt;
1457 tree new_temp;
1459 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1460 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1462 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1463 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1465 /* Scalar boolean value should be transformed into
1466 all zeros or all ones value before building a vector. */
1467 if (VECTOR_BOOLEAN_TYPE_P (type))
1469 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1470 tree false_val = build_zero_cst (TREE_TYPE (type));
1472 if (CONSTANT_CLASS_P (val))
1473 val = integer_zerop (val) ? false_val : true_val;
1474 else
1476 new_temp = make_ssa_name (TREE_TYPE (type));
1477 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1478 val, true_val, false_val);
1479 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1480 val = new_temp;
1483 else if (CONSTANT_CLASS_P (val))
1484 val = fold_convert (TREE_TYPE (type), val);
1485 else
1487 new_temp = make_ssa_name (TREE_TYPE (type));
1488 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1489 init_stmt = gimple_build_assign (new_temp,
1490 fold_build1 (VIEW_CONVERT_EXPR,
1491 TREE_TYPE (type),
1492 val));
1493 else
1494 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1495 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1496 val = new_temp;
1499 val = build_vector_from_val (type, val);
1502 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1503 init_stmt = gimple_build_assign (new_temp, val);
1504 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1505 return new_temp;
1508 /* Function vect_get_vec_def_for_operand_1.
1510 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1511 with type DT that will be used in the vectorized stmt. */
1513 tree
1514 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1515 enum vect_def_type dt)
1517 tree vec_oprnd;
1518 stmt_vec_info vec_stmt_info;
1520 switch (dt)
1522 /* operand is a constant or a loop invariant. */
1523 case vect_constant_def:
1524 case vect_external_def:
1525 /* Code should use vect_get_vec_def_for_operand. */
1526 gcc_unreachable ();
1528 /* Operand is defined by a loop header phi. In case of nested
1529 cycles we also may have uses of the backedge def. */
1530 case vect_reduction_def:
1531 case vect_double_reduction_def:
1532 case vect_nested_cycle:
1533 case vect_induction_def:
1534 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1535 || dt == vect_nested_cycle);
1536 /* Fallthru. */
1538 /* operand is defined inside the loop. */
1539 case vect_internal_def:
1541 /* Get the def from the vectorized stmt. */
1542 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1543 /* Get vectorized pattern statement. */
1544 if (!vec_stmt_info
1545 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1546 && !STMT_VINFO_RELEVANT (def_stmt_info))
1547 vec_stmt_info = (STMT_VINFO_VEC_STMT
1548 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1549 gcc_assert (vec_stmt_info);
1550 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1551 vec_oprnd = PHI_RESULT (phi);
1552 else
1553 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1554 return vec_oprnd;
1557 default:
1558 gcc_unreachable ();
1563 /* Function vect_get_vec_def_for_operand.
1565 OP is an operand in STMT_VINFO. This function returns a (vector) def
1566 that will be used in the vectorized stmt for STMT_VINFO.
1568 In the case that OP is an SSA_NAME which is defined in the loop, then
1569 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1571 In case OP is an invariant or constant, a new stmt that creates a vector def
1572 needs to be introduced. VECTYPE may be used to specify a required type for
1573 vector invariant. */
1575 tree
1576 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1578 gimple *def_stmt;
1579 enum vect_def_type dt;
1580 bool is_simple_use;
1581 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1583 if (dump_enabled_p ())
1584 dump_printf_loc (MSG_NOTE, vect_location,
1585 "vect_get_vec_def_for_operand: %T\n", op);
1587 stmt_vec_info def_stmt_info;
1588 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1589 &def_stmt_info, &def_stmt);
1590 gcc_assert (is_simple_use);
1591 if (def_stmt && dump_enabled_p ())
1592 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1594 if (dt == vect_constant_def || dt == vect_external_def)
1596 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1597 tree vector_type;
1599 if (vectype)
1600 vector_type = vectype;
1601 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1602 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1603 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1604 else
1605 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1607 gcc_assert (vector_type);
1608 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1610 else
1611 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1615 /* Function vect_get_vec_def_for_stmt_copy
1617 Return a vector-def for an operand. This function is used when the
1618 vectorized stmt to be created (by the caller to this function) is a "copy"
1619 created in case the vectorized result cannot fit in one vector, and several
1620 copies of the vector-stmt are required. In this case the vector-def is
1621 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1622 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1624 Context:
1625 In case the vectorization factor (VF) is bigger than the number
1626 of elements that can fit in a vectype (nunits), we have to generate
1627 more than one vector stmt to vectorize the scalar stmt. This situation
1628 arises when there are multiple data-types operated upon in the loop; the
1629 smallest data-type determines the VF, and as a result, when vectorizing
1630 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1631 vector stmt (each computing a vector of 'nunits' results, and together
1632 computing 'VF' results in each iteration). This function is called when
1633 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1634 which VF=16 and nunits=4, so the number of copies required is 4):
1636 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1638 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1639 VS1.1: vx.1 = memref1 VS1.2
1640 VS1.2: vx.2 = memref2 VS1.3
1641 VS1.3: vx.3 = memref3
1643 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1644 VSnew.1: vz1 = vx.1 + ... VSnew.2
1645 VSnew.2: vz2 = vx.2 + ... VSnew.3
1646 VSnew.3: vz3 = vx.3 + ...
1648 The vectorization of S1 is explained in vectorizable_load.
1649 The vectorization of S2:
1650 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1651 the function 'vect_get_vec_def_for_operand' is called to
1652 get the relevant vector-def for each operand of S2. For operand x it
1653 returns the vector-def 'vx.0'.
1655 To create the remaining copies of the vector-stmt (VSnew.j), this
1656 function is called to get the relevant vector-def for each operand. It is
1657 obtained from the respective VS1.j stmt, which is recorded in the
1658 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1660 For example, to obtain the vector-def 'vx.1' in order to create the
1661 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1662 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1663 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1664 and return its def ('vx.1').
1665 Overall, to create the above sequence this function will be called 3 times:
1666 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1667 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1668 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1670 tree
1671 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1673 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1674 if (!def_stmt_info)
1675 /* Do nothing; can reuse same def. */
1676 return vec_oprnd;
1678 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1679 gcc_assert (def_stmt_info);
1680 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1681 vec_oprnd = PHI_RESULT (phi);
1682 else
1683 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1684 return vec_oprnd;
1688 /* Get vectorized definitions for the operands to create a copy of an original
1689 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1691 void
1692 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1693 vec<tree> *vec_oprnds0,
1694 vec<tree> *vec_oprnds1)
1696 tree vec_oprnd = vec_oprnds0->pop ();
1698 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1699 vec_oprnds0->quick_push (vec_oprnd);
1701 if (vec_oprnds1 && vec_oprnds1->length ())
1703 vec_oprnd = vec_oprnds1->pop ();
1704 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1705 vec_oprnds1->quick_push (vec_oprnd);
1710 /* Get vectorized definitions for OP0 and OP1. */
1712 void
1713 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1714 vec<tree> *vec_oprnds0,
1715 vec<tree> *vec_oprnds1,
1716 slp_tree slp_node)
1718 if (slp_node)
1720 int nops = (op1 == NULL_TREE) ? 1 : 2;
1721 auto_vec<tree> ops (nops);
1722 auto_vec<vec<tree> > vec_defs (nops);
1724 ops.quick_push (op0);
1725 if (op1)
1726 ops.quick_push (op1);
1728 vect_get_slp_defs (ops, slp_node, &vec_defs);
1730 *vec_oprnds0 = vec_defs[0];
1731 if (op1)
1732 *vec_oprnds1 = vec_defs[1];
1734 else
1736 tree vec_oprnd;
1738 vec_oprnds0->create (1);
1739 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1740 vec_oprnds0->quick_push (vec_oprnd);
1742 if (op1)
1744 vec_oprnds1->create (1);
1745 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1746 vec_oprnds1->quick_push (vec_oprnd);
1751 /* Helper function called by vect_finish_replace_stmt and
1752 vect_finish_stmt_generation. Set the location of the new
1753 statement and create and return a stmt_vec_info for it. */
1755 static stmt_vec_info
1756 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1758 vec_info *vinfo = stmt_info->vinfo;
1760 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1762 if (dump_enabled_p ())
1763 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1765 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1767 /* While EH edges will generally prevent vectorization, stmt might
1768 e.g. be in a must-not-throw region. Ensure newly created stmts
1769 that could throw are part of the same region. */
1770 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1771 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1772 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1774 return vec_stmt_info;
1777 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1778 which sets the same scalar result as STMT_INFO did. Create and return a
1779 stmt_vec_info for VEC_STMT. */
1781 stmt_vec_info
1782 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1784 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1786 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1787 gsi_replace (&gsi, vec_stmt, true);
1789 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1792 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1793 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1795 stmt_vec_info
1796 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1797 gimple_stmt_iterator *gsi)
1799 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1801 if (!gsi_end_p (*gsi)
1802 && gimple_has_mem_ops (vec_stmt))
1804 gimple *at_stmt = gsi_stmt (*gsi);
1805 tree vuse = gimple_vuse (at_stmt);
1806 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1808 tree vdef = gimple_vdef (at_stmt);
1809 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1810 /* If we have an SSA vuse and insert a store, update virtual
1811 SSA form to avoid triggering the renamer. Do so only
1812 if we can easily see all uses - which is what almost always
1813 happens with the way vectorized stmts are inserted. */
1814 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1815 && ((is_gimple_assign (vec_stmt)
1816 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1817 || (is_gimple_call (vec_stmt)
1818 && !(gimple_call_flags (vec_stmt)
1819 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1821 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1822 gimple_set_vdef (vec_stmt, new_vdef);
1823 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1827 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1828 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1831 /* We want to vectorize a call to combined function CFN with function
1832 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1833 as the types of all inputs. Check whether this is possible using
1834 an internal function, returning its code if so or IFN_LAST if not. */
1836 static internal_fn
1837 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1838 tree vectype_out, tree vectype_in)
1840 internal_fn ifn;
1841 if (internal_fn_p (cfn))
1842 ifn = as_internal_fn (cfn);
1843 else
1844 ifn = associated_internal_fn (fndecl);
1845 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1847 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1848 if (info.vectorizable)
1850 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1851 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1852 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1853 OPTIMIZE_FOR_SPEED))
1854 return ifn;
1857 return IFN_LAST;
1861 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1862 gimple_stmt_iterator *);
1864 /* Check whether a load or store statement in the loop described by
1865 LOOP_VINFO is possible in a fully-masked loop. This is testing
1866 whether the vectorizer pass has the appropriate support, as well as
1867 whether the target does.
1869 VLS_TYPE says whether the statement is a load or store and VECTYPE
1870 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1871 says how the load or store is going to be implemented and GROUP_SIZE
1872 is the number of load or store statements in the containing group.
1873 If the access is a gather load or scatter store, GS_INFO describes
1874 its arguments.
1876 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1877 supported, otherwise record the required mask types. */
1879 static void
1880 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1881 vec_load_store_type vls_type, int group_size,
1882 vect_memory_access_type memory_access_type,
1883 gather_scatter_info *gs_info)
1885 /* Invariant loads need no special support. */
1886 if (memory_access_type == VMAT_INVARIANT)
1887 return;
1889 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1890 machine_mode vecmode = TYPE_MODE (vectype);
1891 bool is_load = (vls_type == VLS_LOAD);
1892 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1894 if (is_load
1895 ? !vect_load_lanes_supported (vectype, group_size, true)
1896 : !vect_store_lanes_supported (vectype, group_size, true))
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 "can't use a fully-masked loop because the"
1901 " target doesn't have an appropriate masked"
1902 " load/store-lanes instruction.\n");
1903 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1904 return;
1906 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1907 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1908 return;
1911 if (memory_access_type == VMAT_GATHER_SCATTER)
1913 internal_fn ifn = (is_load
1914 ? IFN_MASK_GATHER_LOAD
1915 : IFN_MASK_SCATTER_STORE);
1916 tree offset_type = TREE_TYPE (gs_info->offset);
1917 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1918 gs_info->memory_type,
1919 TYPE_SIGN (offset_type),
1920 gs_info->scale))
1922 if (dump_enabled_p ())
1923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1924 "can't use a fully-masked loop because the"
1925 " target doesn't have an appropriate masked"
1926 " gather load or scatter store instruction.\n");
1927 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1928 return;
1930 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1931 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1932 return;
1935 if (memory_access_type != VMAT_CONTIGUOUS
1936 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1938 /* Element X of the data must come from iteration i * VF + X of the
1939 scalar loop. We need more work to support other mappings. */
1940 if (dump_enabled_p ())
1941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1942 "can't use a fully-masked loop because an access"
1943 " isn't contiguous.\n");
1944 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1945 return;
1948 machine_mode mask_mode;
1949 if (!(targetm.vectorize.get_mask_mode
1950 (GET_MODE_NUNITS (vecmode),
1951 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1952 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1954 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1956 "can't use a fully-masked loop because the target"
1957 " doesn't have the appropriate masked load or"
1958 " store.\n");
1959 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1960 return;
1962 /* We might load more scalars than we need for permuting SLP loads.
1963 We checked in get_group_load_store_type that the extra elements
1964 don't leak into a new vector. */
1965 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1966 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1967 unsigned int nvectors;
1968 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1969 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1970 else
1971 gcc_unreachable ();
1974 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1975 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1976 that needs to be applied to all loads and stores in a vectorized loop.
1977 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1979 MASK_TYPE is the type of both masks. If new statements are needed,
1980 insert them before GSI. */
1982 static tree
1983 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1984 gimple_stmt_iterator *gsi)
1986 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1987 if (!loop_mask)
1988 return vec_mask;
1990 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1991 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1992 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1993 vec_mask, loop_mask);
1994 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1995 return and_res;
1998 /* Determine whether we can use a gather load or scatter store to vectorize
1999 strided load or store STMT_INFO by truncating the current offset to a
2000 smaller width. We need to be able to construct an offset vector:
2002 { 0, X, X*2, X*3, ... }
2004 without loss of precision, where X is STMT_INFO's DR_STEP.
2006 Return true if this is possible, describing the gather load or scatter
2007 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2009 static bool
2010 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2011 loop_vec_info loop_vinfo, bool masked_p,
2012 gather_scatter_info *gs_info)
2014 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2015 data_reference *dr = dr_info->dr;
2016 tree step = DR_STEP (dr);
2017 if (TREE_CODE (step) != INTEGER_CST)
2019 /* ??? Perhaps we could use range information here? */
2020 if (dump_enabled_p ())
2021 dump_printf_loc (MSG_NOTE, vect_location,
2022 "cannot truncate variable step.\n");
2023 return false;
2026 /* Get the number of bits in an element. */
2027 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2028 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2029 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2031 /* Set COUNT to the upper limit on the number of elements - 1.
2032 Start with the maximum vectorization factor. */
2033 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2035 /* Try lowering COUNT to the number of scalar latch iterations. */
2036 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2037 widest_int max_iters;
2038 if (max_loop_iterations (loop, &max_iters)
2039 && max_iters < count)
2040 count = max_iters.to_shwi ();
2042 /* Try scales of 1 and the element size. */
2043 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2044 wi::overflow_type overflow = wi::OVF_NONE;
2045 for (int i = 0; i < 2; ++i)
2047 int scale = scales[i];
2048 widest_int factor;
2049 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2050 continue;
2052 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2053 in OFFSET_BITS bits. */
2054 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2055 if (overflow)
2056 continue;
2057 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2058 if (wi::min_precision (range, sign) > element_bits)
2060 overflow = wi::OVF_UNKNOWN;
2061 continue;
2064 /* See whether the target supports the operation. */
2065 tree memory_type = TREE_TYPE (DR_REF (dr));
2066 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2067 memory_type, element_bits, sign, scale,
2068 &gs_info->ifn, &gs_info->element_type))
2069 continue;
2071 tree offset_type = build_nonstandard_integer_type (element_bits,
2072 sign == UNSIGNED);
2074 gs_info->decl = NULL_TREE;
2075 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2076 but we don't need to store that here. */
2077 gs_info->base = NULL_TREE;
2078 gs_info->offset = fold_convert (offset_type, step);
2079 gs_info->offset_dt = vect_constant_def;
2080 gs_info->offset_vectype = NULL_TREE;
2081 gs_info->scale = scale;
2082 gs_info->memory_type = memory_type;
2083 return true;
2086 if (overflow && dump_enabled_p ())
2087 dump_printf_loc (MSG_NOTE, vect_location,
2088 "truncating gather/scatter offset to %d bits"
2089 " might change its value.\n", element_bits);
2091 return false;
2094 /* Return true if we can use gather/scatter internal functions to
2095 vectorize STMT_INFO, which is a grouped or strided load or store.
2096 MASKED_P is true if load or store is conditional. When returning
2097 true, fill in GS_INFO with the information required to perform the
2098 operation. */
2100 static bool
2101 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2102 loop_vec_info loop_vinfo, bool masked_p,
2103 gather_scatter_info *gs_info)
2105 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2106 || gs_info->decl)
2107 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2108 masked_p, gs_info);
2110 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2111 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2112 tree offset_type = TREE_TYPE (gs_info->offset);
2113 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2115 /* Enforced by vect_check_gather_scatter. */
2116 gcc_assert (element_bits >= offset_bits);
2118 /* If the elements are wider than the offset, convert the offset to the
2119 same width, without changing its sign. */
2120 if (element_bits > offset_bits)
2122 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2123 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2124 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_NOTE, vect_location,
2129 "using gather/scatter for strided/grouped access,"
2130 " scale = %d\n", gs_info->scale);
2132 return true;
2135 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2136 elements with a known constant step. Return -1 if that step
2137 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2139 static int
2140 compare_step_with_zero (stmt_vec_info stmt_info)
2142 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2143 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2144 size_zero_node);
2147 /* If the target supports a permute mask that reverses the elements in
2148 a vector of type VECTYPE, return that mask, otherwise return null. */
2150 static tree
2151 perm_mask_for_reverse (tree vectype)
2153 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2155 /* The encoding has a single stepped pattern. */
2156 vec_perm_builder sel (nunits, 1, 3);
2157 for (int i = 0; i < 3; ++i)
2158 sel.quick_push (nunits - 1 - i);
2160 vec_perm_indices indices (sel, 1, nunits);
2161 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2162 return NULL_TREE;
2163 return vect_gen_perm_mask_checked (vectype, indices);
2166 /* STMT_INFO is either a masked or unconditional store. Return the value
2167 being stored. */
2169 tree
2170 vect_get_store_rhs (stmt_vec_info stmt_info)
2172 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2174 gcc_assert (gimple_assign_single_p (assign));
2175 return gimple_assign_rhs1 (assign);
2177 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2179 internal_fn ifn = gimple_call_internal_fn (call);
2180 int index = internal_fn_stored_value_index (ifn);
2181 gcc_assert (index >= 0);
2182 return gimple_call_arg (call, index);
2184 gcc_unreachable ();
2187 /* A subroutine of get_load_store_type, with a subset of the same
2188 arguments. Handle the case where STMT_INFO is part of a grouped load
2189 or store.
2191 For stores, the statements in the group are all consecutive
2192 and there is no gap at the end. For loads, the statements in the
2193 group might not be consecutive; there can be gaps between statements
2194 as well as at the end. */
2196 static bool
2197 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2198 bool masked_p, vec_load_store_type vls_type,
2199 vect_memory_access_type *memory_access_type,
2200 gather_scatter_info *gs_info)
2202 vec_info *vinfo = stmt_info->vinfo;
2203 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2204 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2205 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2206 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2207 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2208 bool single_element_p = (stmt_info == first_stmt_info
2209 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2210 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2211 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2213 /* True if the vectorized statements would access beyond the last
2214 statement in the group. */
2215 bool overrun_p = false;
2217 /* True if we can cope with such overrun by peeling for gaps, so that
2218 there is at least one final scalar iteration after the vector loop. */
2219 bool can_overrun_p = (!masked_p
2220 && vls_type == VLS_LOAD
2221 && loop_vinfo
2222 && !loop->inner);
2224 /* There can only be a gap at the end of the group if the stride is
2225 known at compile time. */
2226 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2228 /* Stores can't yet have gaps. */
2229 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2231 if (slp)
2233 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2235 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2236 separated by the stride, until we have a complete vector.
2237 Fall back to scalar accesses if that isn't possible. */
2238 if (multiple_p (nunits, group_size))
2239 *memory_access_type = VMAT_STRIDED_SLP;
2240 else
2241 *memory_access_type = VMAT_ELEMENTWISE;
2243 else
2245 overrun_p = loop_vinfo && gap != 0;
2246 if (overrun_p && vls_type != VLS_LOAD)
2248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2249 "Grouped store with gaps requires"
2250 " non-consecutive accesses\n");
2251 return false;
2253 /* An overrun is fine if the trailing elements are smaller
2254 than the alignment boundary B. Every vector access will
2255 be a multiple of B and so we are guaranteed to access a
2256 non-gap element in the same B-sized block. */
2257 if (overrun_p
2258 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2259 / vect_get_scalar_dr_size (first_dr_info)))
2260 overrun_p = false;
2261 if (overrun_p && !can_overrun_p)
2263 if (dump_enabled_p ())
2264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2265 "Peeling for outer loop is not supported\n");
2266 return false;
2268 *memory_access_type = VMAT_CONTIGUOUS;
2271 else
2273 /* We can always handle this case using elementwise accesses,
2274 but see if something more efficient is available. */
2275 *memory_access_type = VMAT_ELEMENTWISE;
2277 /* If there is a gap at the end of the group then these optimizations
2278 would access excess elements in the last iteration. */
2279 bool would_overrun_p = (gap != 0);
2280 /* An overrun is fine if the trailing elements are smaller than the
2281 alignment boundary B. Every vector access will be a multiple of B
2282 and so we are guaranteed to access a non-gap element in the
2283 same B-sized block. */
2284 if (would_overrun_p
2285 && !masked_p
2286 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2287 / vect_get_scalar_dr_size (first_dr_info)))
2288 would_overrun_p = false;
2290 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2291 && (can_overrun_p || !would_overrun_p)
2292 && compare_step_with_zero (stmt_info) > 0)
2294 /* First cope with the degenerate case of a single-element
2295 vector. */
2296 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2297 *memory_access_type = VMAT_CONTIGUOUS;
2299 /* Otherwise try using LOAD/STORE_LANES. */
2300 if (*memory_access_type == VMAT_ELEMENTWISE
2301 && (vls_type == VLS_LOAD
2302 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2303 : vect_store_lanes_supported (vectype, group_size,
2304 masked_p)))
2306 *memory_access_type = VMAT_LOAD_STORE_LANES;
2307 overrun_p = would_overrun_p;
2310 /* If that fails, try using permuting loads. */
2311 if (*memory_access_type == VMAT_ELEMENTWISE
2312 && (vls_type == VLS_LOAD
2313 ? vect_grouped_load_supported (vectype, single_element_p,
2314 group_size)
2315 : vect_grouped_store_supported (vectype, group_size)))
2317 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2318 overrun_p = would_overrun_p;
2322 /* As a last resort, trying using a gather load or scatter store.
2324 ??? Although the code can handle all group sizes correctly,
2325 it probably isn't a win to use separate strided accesses based
2326 on nearby locations. Or, even if it's a win over scalar code,
2327 it might not be a win over vectorizing at a lower VF, if that
2328 allows us to use contiguous accesses. */
2329 if (*memory_access_type == VMAT_ELEMENTWISE
2330 && single_element_p
2331 && loop_vinfo
2332 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2333 masked_p, gs_info))
2334 *memory_access_type = VMAT_GATHER_SCATTER;
2337 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2339 /* STMT is the leader of the group. Check the operands of all the
2340 stmts of the group. */
2341 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2342 while (next_stmt_info)
2344 tree op = vect_get_store_rhs (next_stmt_info);
2345 enum vect_def_type dt;
2346 if (!vect_is_simple_use (op, vinfo, &dt))
2348 if (dump_enabled_p ())
2349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2350 "use not simple.\n");
2351 return false;
2353 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2357 if (overrun_p)
2359 gcc_assert (can_overrun_p);
2360 if (dump_enabled_p ())
2361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2362 "Data access with gaps requires scalar "
2363 "epilogue loop\n");
2364 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2367 return true;
2370 /* A subroutine of get_load_store_type, with a subset of the same
2371 arguments. Handle the case where STMT_INFO is a load or store that
2372 accesses consecutive elements with a negative step. */
2374 static vect_memory_access_type
2375 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2376 vec_load_store_type vls_type,
2377 unsigned int ncopies)
2379 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2380 dr_alignment_support alignment_support_scheme;
2382 if (ncopies > 1)
2384 if (dump_enabled_p ())
2385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2386 "multiple types with negative step.\n");
2387 return VMAT_ELEMENTWISE;
2390 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2391 if (alignment_support_scheme != dr_aligned
2392 && alignment_support_scheme != dr_unaligned_supported)
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396 "negative step but alignment required.\n");
2397 return VMAT_ELEMENTWISE;
2400 if (vls_type == VLS_STORE_INVARIANT)
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_NOTE, vect_location,
2404 "negative step with invariant source;"
2405 " no permute needed.\n");
2406 return VMAT_CONTIGUOUS_DOWN;
2409 if (!perm_mask_for_reverse (vectype))
2411 if (dump_enabled_p ())
2412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2413 "negative step and reversing not supported.\n");
2414 return VMAT_ELEMENTWISE;
2417 return VMAT_CONTIGUOUS_REVERSE;
2420 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2421 if there is a memory access type that the vectorized form can use,
2422 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2423 or scatters, fill in GS_INFO accordingly.
2425 SLP says whether we're performing SLP rather than loop vectorization.
2426 MASKED_P is true if the statement is conditional on a vectorized mask.
2427 VECTYPE is the vector type that the vectorized statements will use.
2428 NCOPIES is the number of vector statements that will be needed. */
2430 static bool
2431 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2432 bool masked_p, vec_load_store_type vls_type,
2433 unsigned int ncopies,
2434 vect_memory_access_type *memory_access_type,
2435 gather_scatter_info *gs_info)
2437 vec_info *vinfo = stmt_info->vinfo;
2438 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2439 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2440 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2442 *memory_access_type = VMAT_GATHER_SCATTER;
2443 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2444 gcc_unreachable ();
2445 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2446 &gs_info->offset_dt,
2447 &gs_info->offset_vectype))
2449 if (dump_enabled_p ())
2450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2451 "%s index use not simple.\n",
2452 vls_type == VLS_LOAD ? "gather" : "scatter");
2453 return false;
2456 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2458 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2459 vls_type, memory_access_type, gs_info))
2460 return false;
2462 else if (STMT_VINFO_STRIDED_P (stmt_info))
2464 gcc_assert (!slp);
2465 if (loop_vinfo
2466 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2467 masked_p, gs_info))
2468 *memory_access_type = VMAT_GATHER_SCATTER;
2469 else
2470 *memory_access_type = VMAT_ELEMENTWISE;
2472 else
2474 int cmp = compare_step_with_zero (stmt_info);
2475 if (cmp < 0)
2476 *memory_access_type = get_negative_load_store_type
2477 (stmt_info, vectype, vls_type, ncopies);
2478 else if (cmp == 0)
2480 gcc_assert (vls_type == VLS_LOAD);
2481 *memory_access_type = VMAT_INVARIANT;
2483 else
2484 *memory_access_type = VMAT_CONTIGUOUS;
2487 if ((*memory_access_type == VMAT_ELEMENTWISE
2488 || *memory_access_type == VMAT_STRIDED_SLP)
2489 && !nunits.is_constant ())
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 "Not using elementwise accesses due to variable "
2494 "vectorization factor.\n");
2495 return false;
2498 /* FIXME: At the moment the cost model seems to underestimate the
2499 cost of using elementwise accesses. This check preserves the
2500 traditional behavior until that can be fixed. */
2501 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2502 if (!first_stmt_info)
2503 first_stmt_info = stmt_info;
2504 if (*memory_access_type == VMAT_ELEMENTWISE
2505 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2506 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2507 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2508 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2512 "not falling back to elementwise accesses\n");
2513 return false;
2515 return true;
2518 /* Return true if boolean argument MASK is suitable for vectorizing
2519 conditional load or store STMT_INFO. When returning true, store the type
2520 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2521 in *MASK_VECTYPE_OUT. */
2523 static bool
2524 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2525 vect_def_type *mask_dt_out,
2526 tree *mask_vectype_out)
2528 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2530 if (dump_enabled_p ())
2531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2532 "mask argument is not a boolean.\n");
2533 return false;
2536 if (TREE_CODE (mask) != SSA_NAME)
2538 if (dump_enabled_p ())
2539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2540 "mask argument is not an SSA name.\n");
2541 return false;
2544 enum vect_def_type mask_dt;
2545 tree mask_vectype;
2546 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2548 if (dump_enabled_p ())
2549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2550 "mask use not simple.\n");
2551 return false;
2554 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2555 if (!mask_vectype)
2556 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2558 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2560 if (dump_enabled_p ())
2561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2562 "could not find an appropriate vector mask type.\n");
2563 return false;
2566 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2567 TYPE_VECTOR_SUBPARTS (vectype)))
2569 if (dump_enabled_p ())
2570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2571 "vector mask type %T",
2572 " does not match vector data type %T.\n",
2573 mask_vectype, vectype);
2575 return false;
2578 *mask_dt_out = mask_dt;
2579 *mask_vectype_out = mask_vectype;
2580 return true;
2583 /* Return true if stored value RHS is suitable for vectorizing store
2584 statement STMT_INFO. When returning true, store the type of the
2585 definition in *RHS_DT_OUT, the type of the vectorized store value in
2586 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2588 static bool
2589 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2590 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2591 vec_load_store_type *vls_type_out)
2593 /* In the case this is a store from a constant make sure
2594 native_encode_expr can handle it. */
2595 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2597 if (dump_enabled_p ())
2598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2599 "cannot encode constant as a byte sequence.\n");
2600 return false;
2603 enum vect_def_type rhs_dt;
2604 tree rhs_vectype;
2605 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2607 if (dump_enabled_p ())
2608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2609 "use not simple.\n");
2610 return false;
2613 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2614 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2616 if (dump_enabled_p ())
2617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2618 "incompatible vector types.\n");
2619 return false;
2622 *rhs_dt_out = rhs_dt;
2623 *rhs_vectype_out = rhs_vectype;
2624 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2625 *vls_type_out = VLS_STORE_INVARIANT;
2626 else
2627 *vls_type_out = VLS_STORE;
2628 return true;
2631 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2632 Note that we support masks with floating-point type, in which case the
2633 floats are interpreted as a bitmask. */
2635 static tree
2636 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2638 if (TREE_CODE (masktype) == INTEGER_TYPE)
2639 return build_int_cst (masktype, -1);
2640 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2642 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2643 mask = build_vector_from_val (masktype, mask);
2644 return vect_init_vector (stmt_info, mask, masktype, NULL);
2646 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2648 REAL_VALUE_TYPE r;
2649 long tmp[6];
2650 for (int j = 0; j < 6; ++j)
2651 tmp[j] = -1;
2652 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2653 tree mask = build_real (TREE_TYPE (masktype), r);
2654 mask = build_vector_from_val (masktype, mask);
2655 return vect_init_vector (stmt_info, mask, masktype, NULL);
2657 gcc_unreachable ();
2660 /* Build an all-zero merge value of type VECTYPE while vectorizing
2661 STMT_INFO as a gather load. */
2663 static tree
2664 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2666 tree merge;
2667 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2668 merge = build_int_cst (TREE_TYPE (vectype), 0);
2669 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2671 REAL_VALUE_TYPE r;
2672 long tmp[6];
2673 for (int j = 0; j < 6; ++j)
2674 tmp[j] = 0;
2675 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2676 merge = build_real (TREE_TYPE (vectype), r);
2678 else
2679 gcc_unreachable ();
2680 merge = build_vector_from_val (vectype, merge);
2681 return vect_init_vector (stmt_info, merge, vectype, NULL);
2684 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2685 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2686 the gather load operation. If the load is conditional, MASK is the
2687 unvectorized condition and MASK_DT is its definition type, otherwise
2688 MASK is null. */
2690 static void
2691 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2692 gimple_stmt_iterator *gsi,
2693 stmt_vec_info *vec_stmt,
2694 gather_scatter_info *gs_info,
2695 tree mask)
2697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2698 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2699 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2700 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2701 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2702 edge pe = loop_preheader_edge (loop);
2703 enum { NARROW, NONE, WIDEN } modifier;
2704 poly_uint64 gather_off_nunits
2705 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2707 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2708 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2709 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2710 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2711 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2712 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2713 tree scaletype = TREE_VALUE (arglist);
2714 tree real_masktype = masktype;
2715 gcc_checking_assert (types_compatible_p (srctype, rettype)
2716 && (!mask
2717 || TREE_CODE (masktype) == INTEGER_TYPE
2718 || types_compatible_p (srctype, masktype)));
2719 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2720 masktype = build_same_sized_truth_vector_type (srctype);
2722 tree mask_halftype = masktype;
2723 tree perm_mask = NULL_TREE;
2724 tree mask_perm_mask = NULL_TREE;
2725 if (known_eq (nunits, gather_off_nunits))
2726 modifier = NONE;
2727 else if (known_eq (nunits * 2, gather_off_nunits))
2729 modifier = WIDEN;
2731 /* Currently widening gathers and scatters are only supported for
2732 fixed-length vectors. */
2733 int count = gather_off_nunits.to_constant ();
2734 vec_perm_builder sel (count, count, 1);
2735 for (int i = 0; i < count; ++i)
2736 sel.quick_push (i | (count / 2));
2738 vec_perm_indices indices (sel, 1, count);
2739 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2740 indices);
2742 else if (known_eq (nunits, gather_off_nunits * 2))
2744 modifier = NARROW;
2746 /* Currently narrowing gathers and scatters are only supported for
2747 fixed-length vectors. */
2748 int count = nunits.to_constant ();
2749 vec_perm_builder sel (count, count, 1);
2750 sel.quick_grow (count);
2751 for (int i = 0; i < count; ++i)
2752 sel[i] = i < count / 2 ? i : i + count / 2;
2753 vec_perm_indices indices (sel, 2, count);
2754 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2756 ncopies *= 2;
2758 if (mask && masktype == real_masktype)
2760 for (int i = 0; i < count; ++i)
2761 sel[i] = i | (count / 2);
2762 indices.new_vector (sel, 2, count);
2763 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2765 else if (mask)
2766 mask_halftype
2767 = build_same_sized_truth_vector_type (gs_info->offset_vectype);
2769 else
2770 gcc_unreachable ();
2772 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2773 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2775 tree ptr = fold_convert (ptrtype, gs_info->base);
2776 if (!is_gimple_min_invariant (ptr))
2778 gimple_seq seq;
2779 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2780 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2781 gcc_assert (!new_bb);
2784 tree scale = build_int_cst (scaletype, gs_info->scale);
2786 tree vec_oprnd0 = NULL_TREE;
2787 tree vec_mask = NULL_TREE;
2788 tree src_op = NULL_TREE;
2789 tree mask_op = NULL_TREE;
2790 tree prev_res = NULL_TREE;
2791 stmt_vec_info prev_stmt_info = NULL;
2793 if (!mask)
2795 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2796 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2799 for (int j = 0; j < ncopies; ++j)
2801 tree op, var;
2802 if (modifier == WIDEN && (j & 1))
2803 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2804 perm_mask, stmt_info, gsi);
2805 else if (j == 0)
2806 op = vec_oprnd0
2807 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2808 else
2809 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2810 vec_oprnd0);
2812 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2814 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2815 TYPE_VECTOR_SUBPARTS (idxtype)));
2816 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2817 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2818 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2819 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2820 op = var;
2823 if (mask)
2825 if (mask_perm_mask && (j & 1))
2826 mask_op = permute_vec_elements (mask_op, mask_op,
2827 mask_perm_mask, stmt_info, gsi);
2828 else
2830 if (j == 0)
2831 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2832 else if (modifier != NARROW || (j & 1) == 0)
2833 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2834 vec_mask);
2836 mask_op = vec_mask;
2837 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2839 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2840 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2841 gcc_assert (known_eq (sub1, sub2));
2842 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2843 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2844 gassign *new_stmt
2845 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2846 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2847 mask_op = var;
2850 if (modifier == NARROW && masktype != real_masktype)
2852 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2853 gassign *new_stmt
2854 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2855 : VEC_UNPACK_LO_EXPR,
2856 mask_op);
2857 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2858 mask_op = var;
2860 src_op = mask_op;
2863 tree mask_arg = mask_op;
2864 if (masktype != real_masktype)
2866 tree utype, optype = TREE_TYPE (mask_op);
2867 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2868 utype = real_masktype;
2869 else
2870 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2871 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2872 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2873 gassign *new_stmt
2874 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2875 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2876 mask_arg = var;
2877 if (!useless_type_conversion_p (real_masktype, utype))
2879 gcc_assert (TYPE_PRECISION (utype)
2880 <= TYPE_PRECISION (real_masktype));
2881 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2882 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2883 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2884 mask_arg = var;
2886 src_op = build_zero_cst (srctype);
2888 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2889 mask_arg, scale);
2891 stmt_vec_info new_stmt_info;
2892 if (!useless_type_conversion_p (vectype, rettype))
2894 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2895 TYPE_VECTOR_SUBPARTS (rettype)));
2896 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2897 gimple_call_set_lhs (new_call, op);
2898 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2899 var = make_ssa_name (vec_dest);
2900 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2901 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2902 new_stmt_info
2903 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2905 else
2907 var = make_ssa_name (vec_dest, new_call);
2908 gimple_call_set_lhs (new_call, var);
2909 new_stmt_info
2910 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2913 if (modifier == NARROW)
2915 if ((j & 1) == 0)
2917 prev_res = var;
2918 continue;
2920 var = permute_vec_elements (prev_res, var, perm_mask,
2921 stmt_info, gsi);
2922 new_stmt_info = loop_vinfo->lookup_def (var);
2925 if (prev_stmt_info == NULL)
2926 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2927 else
2928 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2929 prev_stmt_info = new_stmt_info;
2933 /* Prepare the base and offset in GS_INFO for vectorization.
2934 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2935 to the vectorized offset argument for the first copy of STMT_INFO.
2936 STMT_INFO is the statement described by GS_INFO and LOOP is the
2937 containing loop. */
2939 static void
2940 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2941 gather_scatter_info *gs_info,
2942 tree *dataref_ptr, tree *vec_offset)
2944 gimple_seq stmts = NULL;
2945 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2946 if (stmts != NULL)
2948 basic_block new_bb;
2949 edge pe = loop_preheader_edge (loop);
2950 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2951 gcc_assert (!new_bb);
2953 tree offset_type = TREE_TYPE (gs_info->offset);
2954 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2955 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2956 offset_vectype);
2959 /* Prepare to implement a grouped or strided load or store using
2960 the gather load or scatter store operation described by GS_INFO.
2961 STMT_INFO is the load or store statement.
2963 Set *DATAREF_BUMP to the amount that should be added to the base
2964 address after each copy of the vectorized statement. Set *VEC_OFFSET
2965 to an invariant offset vector in which element I has the value
2966 I * DR_STEP / SCALE. */
2968 static void
2969 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2970 loop_vec_info loop_vinfo,
2971 gather_scatter_info *gs_info,
2972 tree *dataref_bump, tree *vec_offset)
2974 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2975 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2976 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2977 gimple_seq stmts;
2979 tree bump = size_binop (MULT_EXPR,
2980 fold_convert (sizetype, DR_STEP (dr)),
2981 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2982 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2983 if (stmts)
2984 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2986 /* The offset given in GS_INFO can have pointer type, so use the element
2987 type of the vector instead. */
2988 tree offset_type = TREE_TYPE (gs_info->offset);
2989 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2990 offset_type = TREE_TYPE (offset_vectype);
2992 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2993 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2994 ssize_int (gs_info->scale));
2995 step = fold_convert (offset_type, step);
2996 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2998 /* Create {0, X, X*2, X*3, ...}. */
2999 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
3000 build_zero_cst (offset_type), step);
3001 if (stmts)
3002 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3005 /* Return the amount that should be added to a vector pointer to move
3006 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3007 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3008 vectorization. */
3010 static tree
3011 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3012 vect_memory_access_type memory_access_type)
3014 if (memory_access_type == VMAT_INVARIANT)
3015 return size_zero_node;
3017 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3018 tree step = vect_dr_behavior (dr_info)->step;
3019 if (tree_int_cst_sgn (step) == -1)
3020 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3021 return iv_step;
3024 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3026 static bool
3027 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3028 stmt_vec_info *vec_stmt, slp_tree slp_node,
3029 tree vectype_in, stmt_vector_for_cost *cost_vec)
3031 tree op, vectype;
3032 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3033 vec_info *vinfo = stmt_info->vinfo;
3034 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3035 unsigned ncopies;
3037 op = gimple_call_arg (stmt, 0);
3038 vectype = STMT_VINFO_VECTYPE (stmt_info);
3039 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3041 /* Multiple types in SLP are handled by creating the appropriate number of
3042 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3043 case of SLP. */
3044 if (slp_node)
3045 ncopies = 1;
3046 else
3047 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3049 gcc_assert (ncopies >= 1);
3051 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3052 if (! char_vectype)
3053 return false;
3055 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3056 unsigned word_bytes;
3057 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3058 return false;
3060 /* The encoding uses one stepped pattern for each byte in the word. */
3061 vec_perm_builder elts (num_bytes, word_bytes, 3);
3062 for (unsigned i = 0; i < 3; ++i)
3063 for (unsigned j = 0; j < word_bytes; ++j)
3064 elts.quick_push ((i + 1) * word_bytes - j - 1);
3066 vec_perm_indices indices (elts, 1, num_bytes);
3067 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3068 return false;
3070 if (! vec_stmt)
3072 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3073 DUMP_VECT_SCOPE ("vectorizable_bswap");
3074 if (! slp_node)
3076 record_stmt_cost (cost_vec,
3077 1, vector_stmt, stmt_info, 0, vect_prologue);
3078 record_stmt_cost (cost_vec,
3079 ncopies, vec_perm, stmt_info, 0, vect_body);
3081 return true;
3084 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3086 /* Transform. */
3087 vec<tree> vec_oprnds = vNULL;
3088 stmt_vec_info new_stmt_info = NULL;
3089 stmt_vec_info prev_stmt_info = NULL;
3090 for (unsigned j = 0; j < ncopies; j++)
3092 /* Handle uses. */
3093 if (j == 0)
3094 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3095 else
3096 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3098 /* Arguments are ready. create the new vector stmt. */
3099 unsigned i;
3100 tree vop;
3101 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3103 gimple *new_stmt;
3104 tree tem = make_ssa_name (char_vectype);
3105 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3106 char_vectype, vop));
3107 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3108 tree tem2 = make_ssa_name (char_vectype);
3109 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3110 tem, tem, bswap_vconst);
3111 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3112 tem = make_ssa_name (vectype);
3113 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3114 vectype, tem2));
3115 new_stmt_info
3116 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3117 if (slp_node)
3118 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3121 if (slp_node)
3122 continue;
3124 if (j == 0)
3125 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3126 else
3127 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3129 prev_stmt_info = new_stmt_info;
3132 vec_oprnds.release ();
3133 return true;
3136 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3137 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3138 in a single step. On success, store the binary pack code in
3139 *CONVERT_CODE. */
3141 static bool
3142 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3143 tree_code *convert_code)
3145 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3146 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3147 return false;
3149 tree_code code;
3150 int multi_step_cvt = 0;
3151 auto_vec <tree, 8> interm_types;
3152 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3153 &code, &multi_step_cvt,
3154 &interm_types)
3155 || multi_step_cvt)
3156 return false;
3158 *convert_code = code;
3159 return true;
3162 /* Function vectorizable_call.
3164 Check if STMT_INFO performs a function call that can be vectorized.
3165 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3166 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3167 Return true if STMT_INFO is vectorizable in this way. */
3169 static bool
3170 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3171 stmt_vec_info *vec_stmt, slp_tree slp_node,
3172 stmt_vector_for_cost *cost_vec)
3174 gcall *stmt;
3175 tree vec_dest;
3176 tree scalar_dest;
3177 tree op;
3178 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3179 stmt_vec_info prev_stmt_info;
3180 tree vectype_out, vectype_in;
3181 poly_uint64 nunits_in;
3182 poly_uint64 nunits_out;
3183 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3184 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3185 vec_info *vinfo = stmt_info->vinfo;
3186 tree fndecl, new_temp, rhs_type;
3187 enum vect_def_type dt[4]
3188 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3189 vect_unknown_def_type };
3190 tree vectypes[ARRAY_SIZE (dt)] = {};
3191 int ndts = ARRAY_SIZE (dt);
3192 int ncopies, j;
3193 auto_vec<tree, 8> vargs;
3194 auto_vec<tree, 8> orig_vargs;
3195 enum { NARROW, NONE, WIDEN } modifier;
3196 size_t i, nargs;
3197 tree lhs;
3199 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3200 return false;
3202 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3203 && ! vec_stmt)
3204 return false;
3206 /* Is STMT_INFO a vectorizable call? */
3207 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3208 if (!stmt)
3209 return false;
3211 if (gimple_call_internal_p (stmt)
3212 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3213 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3214 /* Handled by vectorizable_load and vectorizable_store. */
3215 return false;
3217 if (gimple_call_lhs (stmt) == NULL_TREE
3218 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3219 return false;
3221 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3223 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3225 /* Process function arguments. */
3226 rhs_type = NULL_TREE;
3227 vectype_in = NULL_TREE;
3228 nargs = gimple_call_num_args (stmt);
3230 /* Bail out if the function has more than three arguments, we do not have
3231 interesting builtin functions to vectorize with more than two arguments
3232 except for fma. No arguments is also not good. */
3233 if (nargs == 0 || nargs > 4)
3234 return false;
3236 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3237 combined_fn cfn = gimple_call_combined_fn (stmt);
3238 if (cfn == CFN_GOMP_SIMD_LANE)
3240 nargs = 0;
3241 rhs_type = unsigned_type_node;
3244 int mask_opno = -1;
3245 if (internal_fn_p (cfn))
3246 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3248 for (i = 0; i < nargs; i++)
3250 op = gimple_call_arg (stmt, i);
3251 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3253 if (dump_enabled_p ())
3254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3255 "use not simple.\n");
3256 return false;
3259 /* Skip the mask argument to an internal function. This operand
3260 has been converted via a pattern if necessary. */
3261 if ((int) i == mask_opno)
3262 continue;
3264 /* We can only handle calls with arguments of the same type. */
3265 if (rhs_type
3266 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3268 if (dump_enabled_p ())
3269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3270 "argument types differ.\n");
3271 return false;
3273 if (!rhs_type)
3274 rhs_type = TREE_TYPE (op);
3276 if (!vectype_in)
3277 vectype_in = vectypes[i];
3278 else if (vectypes[i]
3279 && vectypes[i] != vectype_in)
3281 if (dump_enabled_p ())
3282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3283 "argument vector types differ.\n");
3284 return false;
3287 /* If all arguments are external or constant defs use a vector type with
3288 the same size as the output vector type. */
3289 if (!vectype_in)
3290 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3291 if (vec_stmt)
3292 gcc_assert (vectype_in);
3293 if (!vectype_in)
3295 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3297 "no vectype for scalar type %T\n", rhs_type);
3299 return false;
3302 /* FORNOW */
3303 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3304 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3305 if (known_eq (nunits_in * 2, nunits_out))
3306 modifier = NARROW;
3307 else if (known_eq (nunits_out, nunits_in))
3308 modifier = NONE;
3309 else if (known_eq (nunits_out * 2, nunits_in))
3310 modifier = WIDEN;
3311 else
3312 return false;
3314 /* We only handle functions that do not read or clobber memory. */
3315 if (gimple_vuse (stmt))
3317 if (dump_enabled_p ())
3318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3319 "function reads from or writes to memory.\n");
3320 return false;
3323 /* For now, we only vectorize functions if a target specific builtin
3324 is available. TODO -- in some cases, it might be profitable to
3325 insert the calls for pieces of the vector, in order to be able
3326 to vectorize other operations in the loop. */
3327 fndecl = NULL_TREE;
3328 internal_fn ifn = IFN_LAST;
3329 tree callee = gimple_call_fndecl (stmt);
3331 /* First try using an internal function. */
3332 tree_code convert_code = ERROR_MARK;
3333 if (cfn != CFN_LAST
3334 && (modifier == NONE
3335 || (modifier == NARROW
3336 && simple_integer_narrowing (vectype_out, vectype_in,
3337 &convert_code))))
3338 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3339 vectype_in);
3341 /* If that fails, try asking for a target-specific built-in function. */
3342 if (ifn == IFN_LAST)
3344 if (cfn != CFN_LAST)
3345 fndecl = targetm.vectorize.builtin_vectorized_function
3346 (cfn, vectype_out, vectype_in);
3347 else if (callee)
3348 fndecl = targetm.vectorize.builtin_md_vectorized_function
3349 (callee, vectype_out, vectype_in);
3352 if (ifn == IFN_LAST && !fndecl)
3354 if (cfn == CFN_GOMP_SIMD_LANE
3355 && !slp_node
3356 && loop_vinfo
3357 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3358 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3359 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3360 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3362 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3363 { 0, 1, 2, ... vf - 1 } vector. */
3364 gcc_assert (nargs == 0);
3366 else if (modifier == NONE
3367 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3368 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3369 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3370 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3371 vectype_in, cost_vec);
3372 else
3374 if (dump_enabled_p ())
3375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3376 "function is not vectorizable.\n");
3377 return false;
3381 if (slp_node)
3382 ncopies = 1;
3383 else if (modifier == NARROW && ifn == IFN_LAST)
3384 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3385 else
3386 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3388 /* Sanity check: make sure that at least one copy of the vectorized stmt
3389 needs to be generated. */
3390 gcc_assert (ncopies >= 1);
3392 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3393 if (!vec_stmt) /* transformation not required. */
3395 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3396 DUMP_VECT_SCOPE ("vectorizable_call");
3397 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3398 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3399 record_stmt_cost (cost_vec, ncopies / 2,
3400 vec_promote_demote, stmt_info, 0, vect_body);
3402 if (loop_vinfo && mask_opno >= 0)
3404 unsigned int nvectors = (slp_node
3405 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3406 : ncopies);
3407 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3409 return true;
3412 /* Transform. */
3414 if (dump_enabled_p ())
3415 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3417 /* Handle def. */
3418 scalar_dest = gimple_call_lhs (stmt);
3419 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3421 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3423 stmt_vec_info new_stmt_info = NULL;
3424 prev_stmt_info = NULL;
3425 if (modifier == NONE || ifn != IFN_LAST)
3427 tree prev_res = NULL_TREE;
3428 vargs.safe_grow (nargs);
3429 orig_vargs.safe_grow (nargs);
3430 for (j = 0; j < ncopies; ++j)
3432 /* Build argument list for the vectorized call. */
3433 if (slp_node)
3435 auto_vec<vec<tree> > vec_defs (nargs);
3436 vec<tree> vec_oprnds0;
3438 for (i = 0; i < nargs; i++)
3439 vargs[i] = gimple_call_arg (stmt, i);
3440 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3441 vec_oprnds0 = vec_defs[0];
3443 /* Arguments are ready. Create the new vector stmt. */
3444 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3446 size_t k;
3447 for (k = 0; k < nargs; k++)
3449 vec<tree> vec_oprndsk = vec_defs[k];
3450 vargs[k] = vec_oprndsk[i];
3452 if (modifier == NARROW)
3454 /* We don't define any narrowing conditional functions
3455 at present. */
3456 gcc_assert (mask_opno < 0);
3457 tree half_res = make_ssa_name (vectype_in);
3458 gcall *call
3459 = gimple_build_call_internal_vec (ifn, vargs);
3460 gimple_call_set_lhs (call, half_res);
3461 gimple_call_set_nothrow (call, true);
3462 new_stmt_info
3463 = vect_finish_stmt_generation (stmt_info, call, gsi);
3464 if ((i & 1) == 0)
3466 prev_res = half_res;
3467 continue;
3469 new_temp = make_ssa_name (vec_dest);
3470 gimple *new_stmt
3471 = gimple_build_assign (new_temp, convert_code,
3472 prev_res, half_res);
3473 new_stmt_info
3474 = vect_finish_stmt_generation (stmt_info, new_stmt,
3475 gsi);
3477 else
3479 if (mask_opno >= 0 && masked_loop_p)
3481 unsigned int vec_num = vec_oprnds0.length ();
3482 /* Always true for SLP. */
3483 gcc_assert (ncopies == 1);
3484 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3485 vectype_out, i);
3486 vargs[mask_opno] = prepare_load_store_mask
3487 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3490 gcall *call;
3491 if (ifn != IFN_LAST)
3492 call = gimple_build_call_internal_vec (ifn, vargs);
3493 else
3494 call = gimple_build_call_vec (fndecl, vargs);
3495 new_temp = make_ssa_name (vec_dest, call);
3496 gimple_call_set_lhs (call, new_temp);
3497 gimple_call_set_nothrow (call, true);
3498 new_stmt_info
3499 = vect_finish_stmt_generation (stmt_info, call, gsi);
3501 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3504 for (i = 0; i < nargs; i++)
3506 vec<tree> vec_oprndsi = vec_defs[i];
3507 vec_oprndsi.release ();
3509 continue;
3512 if (mask_opno >= 0 && !vectypes[mask_opno])
3514 gcc_assert (modifier != WIDEN);
3515 vectypes[mask_opno]
3516 = build_same_sized_truth_vector_type (vectype_in);
3519 for (i = 0; i < nargs; i++)
3521 op = gimple_call_arg (stmt, i);
3522 if (j == 0)
3523 vec_oprnd0
3524 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3525 else
3526 vec_oprnd0
3527 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3529 orig_vargs[i] = vargs[i] = vec_oprnd0;
3532 if (mask_opno >= 0 && masked_loop_p)
3534 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3535 vectype_out, j);
3536 vargs[mask_opno]
3537 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3538 vargs[mask_opno], gsi);
3541 if (cfn == CFN_GOMP_SIMD_LANE)
3543 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3544 tree new_var
3545 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3546 gimple *init_stmt = gimple_build_assign (new_var, cst);
3547 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3548 new_temp = make_ssa_name (vec_dest);
3549 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3550 new_stmt_info
3551 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3553 else if (modifier == NARROW)
3555 /* We don't define any narrowing conditional functions at
3556 present. */
3557 gcc_assert (mask_opno < 0);
3558 tree half_res = make_ssa_name (vectype_in);
3559 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3560 gimple_call_set_lhs (call, half_res);
3561 gimple_call_set_nothrow (call, true);
3562 new_stmt_info
3563 = vect_finish_stmt_generation (stmt_info, call, gsi);
3564 if ((j & 1) == 0)
3566 prev_res = half_res;
3567 continue;
3569 new_temp = make_ssa_name (vec_dest);
3570 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3571 prev_res, half_res);
3572 new_stmt_info
3573 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3575 else
3577 gcall *call;
3578 if (ifn != IFN_LAST)
3579 call = gimple_build_call_internal_vec (ifn, vargs);
3580 else
3581 call = gimple_build_call_vec (fndecl, vargs);
3582 new_temp = make_ssa_name (vec_dest, call);
3583 gimple_call_set_lhs (call, new_temp);
3584 gimple_call_set_nothrow (call, true);
3585 new_stmt_info
3586 = vect_finish_stmt_generation (stmt_info, call, gsi);
3589 if (j == (modifier == NARROW ? 1 : 0))
3590 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3591 else
3592 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3594 prev_stmt_info = new_stmt_info;
3597 else if (modifier == NARROW)
3599 /* We don't define any narrowing conditional functions at present. */
3600 gcc_assert (mask_opno < 0);
3601 for (j = 0; j < ncopies; ++j)
3603 /* Build argument list for the vectorized call. */
3604 if (j == 0)
3605 vargs.create (nargs * 2);
3606 else
3607 vargs.truncate (0);
3609 if (slp_node)
3611 auto_vec<vec<tree> > vec_defs (nargs);
3612 vec<tree> vec_oprnds0;
3614 for (i = 0; i < nargs; i++)
3615 vargs.quick_push (gimple_call_arg (stmt, i));
3616 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3617 vec_oprnds0 = vec_defs[0];
3619 /* Arguments are ready. Create the new vector stmt. */
3620 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3622 size_t k;
3623 vargs.truncate (0);
3624 for (k = 0; k < nargs; k++)
3626 vec<tree> vec_oprndsk = vec_defs[k];
3627 vargs.quick_push (vec_oprndsk[i]);
3628 vargs.quick_push (vec_oprndsk[i + 1]);
3630 gcall *call;
3631 if (ifn != IFN_LAST)
3632 call = gimple_build_call_internal_vec (ifn, vargs);
3633 else
3634 call = gimple_build_call_vec (fndecl, vargs);
3635 new_temp = make_ssa_name (vec_dest, call);
3636 gimple_call_set_lhs (call, new_temp);
3637 gimple_call_set_nothrow (call, true);
3638 new_stmt_info
3639 = vect_finish_stmt_generation (stmt_info, call, gsi);
3640 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3643 for (i = 0; i < nargs; i++)
3645 vec<tree> vec_oprndsi = vec_defs[i];
3646 vec_oprndsi.release ();
3648 continue;
3651 for (i = 0; i < nargs; i++)
3653 op = gimple_call_arg (stmt, i);
3654 if (j == 0)
3656 vec_oprnd0
3657 = vect_get_vec_def_for_operand (op, stmt_info,
3658 vectypes[i]);
3659 vec_oprnd1
3660 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3662 else
3664 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3665 2 * i + 1);
3666 vec_oprnd0
3667 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3668 vec_oprnd1
3669 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3672 vargs.quick_push (vec_oprnd0);
3673 vargs.quick_push (vec_oprnd1);
3676 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3677 new_temp = make_ssa_name (vec_dest, new_stmt);
3678 gimple_call_set_lhs (new_stmt, new_temp);
3679 new_stmt_info
3680 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3682 if (j == 0)
3683 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3684 else
3685 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3687 prev_stmt_info = new_stmt_info;
3690 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3692 else
3693 /* No current target implements this case. */
3694 return false;
3696 vargs.release ();
3698 /* The call in STMT might prevent it from being removed in dce.
3699 We however cannot remove it here, due to the way the ssa name
3700 it defines is mapped to the new definition. So just replace
3701 rhs of the statement with something harmless. */
3703 if (slp_node)
3704 return true;
3706 stmt_info = vect_orig_stmt (stmt_info);
3707 lhs = gimple_get_lhs (stmt_info->stmt);
3709 gassign *new_stmt
3710 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3711 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3713 return true;
3717 struct simd_call_arg_info
3719 tree vectype;
3720 tree op;
3721 HOST_WIDE_INT linear_step;
3722 enum vect_def_type dt;
3723 unsigned int align;
3724 bool simd_lane_linear;
3727 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3728 is linear within simd lane (but not within whole loop), note it in
3729 *ARGINFO. */
3731 static void
3732 vect_simd_lane_linear (tree op, struct loop *loop,
3733 struct simd_call_arg_info *arginfo)
3735 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3737 if (!is_gimple_assign (def_stmt)
3738 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3739 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3740 return;
3742 tree base = gimple_assign_rhs1 (def_stmt);
3743 HOST_WIDE_INT linear_step = 0;
3744 tree v = gimple_assign_rhs2 (def_stmt);
3745 while (TREE_CODE (v) == SSA_NAME)
3747 tree t;
3748 def_stmt = SSA_NAME_DEF_STMT (v);
3749 if (is_gimple_assign (def_stmt))
3750 switch (gimple_assign_rhs_code (def_stmt))
3752 case PLUS_EXPR:
3753 t = gimple_assign_rhs2 (def_stmt);
3754 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3755 return;
3756 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3757 v = gimple_assign_rhs1 (def_stmt);
3758 continue;
3759 case MULT_EXPR:
3760 t = gimple_assign_rhs2 (def_stmt);
3761 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3762 return;
3763 linear_step = tree_to_shwi (t);
3764 v = gimple_assign_rhs1 (def_stmt);
3765 continue;
3766 CASE_CONVERT:
3767 t = gimple_assign_rhs1 (def_stmt);
3768 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3769 || (TYPE_PRECISION (TREE_TYPE (v))
3770 < TYPE_PRECISION (TREE_TYPE (t))))
3771 return;
3772 if (!linear_step)
3773 linear_step = 1;
3774 v = t;
3775 continue;
3776 default:
3777 return;
3779 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3780 && loop->simduid
3781 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3782 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3783 == loop->simduid))
3785 if (!linear_step)
3786 linear_step = 1;
3787 arginfo->linear_step = linear_step;
3788 arginfo->op = base;
3789 arginfo->simd_lane_linear = true;
3790 return;
3795 /* Return the number of elements in vector type VECTYPE, which is associated
3796 with a SIMD clone. At present these vectors always have a constant
3797 length. */
3799 static unsigned HOST_WIDE_INT
3800 simd_clone_subparts (tree vectype)
3802 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3805 /* Function vectorizable_simd_clone_call.
3807 Check if STMT_INFO performs a function call that can be vectorized
3808 by calling a simd clone of the function.
3809 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3810 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3811 Return true if STMT_INFO is vectorizable in this way. */
3813 static bool
3814 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3815 gimple_stmt_iterator *gsi,
3816 stmt_vec_info *vec_stmt, slp_tree slp_node,
3817 stmt_vector_for_cost *)
3819 tree vec_dest;
3820 tree scalar_dest;
3821 tree op, type;
3822 tree vec_oprnd0 = NULL_TREE;
3823 stmt_vec_info prev_stmt_info;
3824 tree vectype;
3825 unsigned int nunits;
3826 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3827 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3828 vec_info *vinfo = stmt_info->vinfo;
3829 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3830 tree fndecl, new_temp;
3831 int ncopies, j;
3832 auto_vec<simd_call_arg_info> arginfo;
3833 vec<tree> vargs = vNULL;
3834 size_t i, nargs;
3835 tree lhs, rtype, ratype;
3836 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3838 /* Is STMT a vectorizable call? */
3839 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3840 if (!stmt)
3841 return false;
3843 fndecl = gimple_call_fndecl (stmt);
3844 if (fndecl == NULL_TREE)
3845 return false;
3847 struct cgraph_node *node = cgraph_node::get (fndecl);
3848 if (node == NULL || node->simd_clones == NULL)
3849 return false;
3851 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3852 return false;
3854 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3855 && ! vec_stmt)
3856 return false;
3858 if (gimple_call_lhs (stmt)
3859 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3860 return false;
3862 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3864 vectype = STMT_VINFO_VECTYPE (stmt_info);
3866 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3867 return false;
3869 /* FORNOW */
3870 if (slp_node)
3871 return false;
3873 /* Process function arguments. */
3874 nargs = gimple_call_num_args (stmt);
3876 /* Bail out if the function has zero arguments. */
3877 if (nargs == 0)
3878 return false;
3880 arginfo.reserve (nargs, true);
3882 for (i = 0; i < nargs; i++)
3884 simd_call_arg_info thisarginfo;
3885 affine_iv iv;
3887 thisarginfo.linear_step = 0;
3888 thisarginfo.align = 0;
3889 thisarginfo.op = NULL_TREE;
3890 thisarginfo.simd_lane_linear = false;
3892 op = gimple_call_arg (stmt, i);
3893 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3894 &thisarginfo.vectype)
3895 || thisarginfo.dt == vect_uninitialized_def)
3897 if (dump_enabled_p ())
3898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3899 "use not simple.\n");
3900 return false;
3903 if (thisarginfo.dt == vect_constant_def
3904 || thisarginfo.dt == vect_external_def)
3905 gcc_assert (thisarginfo.vectype == NULL_TREE);
3906 else
3907 gcc_assert (thisarginfo.vectype != NULL_TREE);
3909 /* For linear arguments, the analyze phase should have saved
3910 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3911 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3912 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3914 gcc_assert (vec_stmt);
3915 thisarginfo.linear_step
3916 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3917 thisarginfo.op
3918 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3919 thisarginfo.simd_lane_linear
3920 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3921 == boolean_true_node);
3922 /* If loop has been peeled for alignment, we need to adjust it. */
3923 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3924 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3925 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3927 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3928 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3929 tree opt = TREE_TYPE (thisarginfo.op);
3930 bias = fold_convert (TREE_TYPE (step), bias);
3931 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3932 thisarginfo.op
3933 = fold_build2 (POINTER_TYPE_P (opt)
3934 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3935 thisarginfo.op, bias);
3938 else if (!vec_stmt
3939 && thisarginfo.dt != vect_constant_def
3940 && thisarginfo.dt != vect_external_def
3941 && loop_vinfo
3942 && TREE_CODE (op) == SSA_NAME
3943 && simple_iv (loop, loop_containing_stmt (stmt), op,
3944 &iv, false)
3945 && tree_fits_shwi_p (iv.step))
3947 thisarginfo.linear_step = tree_to_shwi (iv.step);
3948 thisarginfo.op = iv.base;
3950 else if ((thisarginfo.dt == vect_constant_def
3951 || thisarginfo.dt == vect_external_def)
3952 && POINTER_TYPE_P (TREE_TYPE (op)))
3953 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3954 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3955 linear too. */
3956 if (POINTER_TYPE_P (TREE_TYPE (op))
3957 && !thisarginfo.linear_step
3958 && !vec_stmt
3959 && thisarginfo.dt != vect_constant_def
3960 && thisarginfo.dt != vect_external_def
3961 && loop_vinfo
3962 && !slp_node
3963 && TREE_CODE (op) == SSA_NAME)
3964 vect_simd_lane_linear (op, loop, &thisarginfo);
3966 arginfo.quick_push (thisarginfo);
3969 unsigned HOST_WIDE_INT vf;
3970 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3972 if (dump_enabled_p ())
3973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3974 "not considering SIMD clones; not yet supported"
3975 " for variable-width vectors.\n");
3976 return false;
3979 unsigned int badness = 0;
3980 struct cgraph_node *bestn = NULL;
3981 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3982 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3983 else
3984 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3985 n = n->simdclone->next_clone)
3987 unsigned int this_badness = 0;
3988 if (n->simdclone->simdlen > vf
3989 || n->simdclone->nargs != nargs)
3990 continue;
3991 if (n->simdclone->simdlen < vf)
3992 this_badness += (exact_log2 (vf)
3993 - exact_log2 (n->simdclone->simdlen)) * 1024;
3994 if (n->simdclone->inbranch)
3995 this_badness += 2048;
3996 int target_badness = targetm.simd_clone.usable (n);
3997 if (target_badness < 0)
3998 continue;
3999 this_badness += target_badness * 512;
4000 /* FORNOW: Have to add code to add the mask argument. */
4001 if (n->simdclone->inbranch)
4002 continue;
4003 for (i = 0; i < nargs; i++)
4005 switch (n->simdclone->args[i].arg_type)
4007 case SIMD_CLONE_ARG_TYPE_VECTOR:
4008 if (!useless_type_conversion_p
4009 (n->simdclone->args[i].orig_type,
4010 TREE_TYPE (gimple_call_arg (stmt, i))))
4011 i = -1;
4012 else if (arginfo[i].dt == vect_constant_def
4013 || arginfo[i].dt == vect_external_def
4014 || arginfo[i].linear_step)
4015 this_badness += 64;
4016 break;
4017 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4018 if (arginfo[i].dt != vect_constant_def
4019 && arginfo[i].dt != vect_external_def)
4020 i = -1;
4021 break;
4022 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4023 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4024 if (arginfo[i].dt == vect_constant_def
4025 || arginfo[i].dt == vect_external_def
4026 || (arginfo[i].linear_step
4027 != n->simdclone->args[i].linear_step))
4028 i = -1;
4029 break;
4030 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4031 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4032 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4033 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4034 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4035 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4036 /* FORNOW */
4037 i = -1;
4038 break;
4039 case SIMD_CLONE_ARG_TYPE_MASK:
4040 gcc_unreachable ();
4042 if (i == (size_t) -1)
4043 break;
4044 if (n->simdclone->args[i].alignment > arginfo[i].align)
4046 i = -1;
4047 break;
4049 if (arginfo[i].align)
4050 this_badness += (exact_log2 (arginfo[i].align)
4051 - exact_log2 (n->simdclone->args[i].alignment));
4053 if (i == (size_t) -1)
4054 continue;
4055 if (bestn == NULL || this_badness < badness)
4057 bestn = n;
4058 badness = this_badness;
4062 if (bestn == NULL)
4063 return false;
4065 for (i = 0; i < nargs; i++)
4066 if ((arginfo[i].dt == vect_constant_def
4067 || arginfo[i].dt == vect_external_def)
4068 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4070 arginfo[i].vectype
4071 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4072 i)));
4073 if (arginfo[i].vectype == NULL
4074 || (simd_clone_subparts (arginfo[i].vectype)
4075 > bestn->simdclone->simdlen))
4076 return false;
4079 fndecl = bestn->decl;
4080 nunits = bestn->simdclone->simdlen;
4081 ncopies = vf / nunits;
4083 /* If the function isn't const, only allow it in simd loops where user
4084 has asserted that at least nunits consecutive iterations can be
4085 performed using SIMD instructions. */
4086 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4087 && gimple_vuse (stmt))
4088 return false;
4090 /* Sanity check: make sure that at least one copy of the vectorized stmt
4091 needs to be generated. */
4092 gcc_assert (ncopies >= 1);
4094 if (!vec_stmt) /* transformation not required. */
4096 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4097 for (i = 0; i < nargs; i++)
4098 if ((bestn->simdclone->args[i].arg_type
4099 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4100 || (bestn->simdclone->args[i].arg_type
4101 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4103 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4104 + 1);
4105 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4106 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4107 ? size_type_node : TREE_TYPE (arginfo[i].op);
4108 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4109 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4110 tree sll = arginfo[i].simd_lane_linear
4111 ? boolean_true_node : boolean_false_node;
4112 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4114 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4115 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4116 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4117 return true;
4120 /* Transform. */
4122 if (dump_enabled_p ())
4123 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4125 /* Handle def. */
4126 scalar_dest = gimple_call_lhs (stmt);
4127 vec_dest = NULL_TREE;
4128 rtype = NULL_TREE;
4129 ratype = NULL_TREE;
4130 if (scalar_dest)
4132 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4133 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4134 if (TREE_CODE (rtype) == ARRAY_TYPE)
4136 ratype = rtype;
4137 rtype = TREE_TYPE (ratype);
4141 prev_stmt_info = NULL;
4142 for (j = 0; j < ncopies; ++j)
4144 /* Build argument list for the vectorized call. */
4145 if (j == 0)
4146 vargs.create (nargs);
4147 else
4148 vargs.truncate (0);
4150 for (i = 0; i < nargs; i++)
4152 unsigned int k, l, m, o;
4153 tree atype;
4154 op = gimple_call_arg (stmt, i);
4155 switch (bestn->simdclone->args[i].arg_type)
4157 case SIMD_CLONE_ARG_TYPE_VECTOR:
4158 atype = bestn->simdclone->args[i].vector_type;
4159 o = nunits / simd_clone_subparts (atype);
4160 for (m = j * o; m < (j + 1) * o; m++)
4162 if (simd_clone_subparts (atype)
4163 < simd_clone_subparts (arginfo[i].vectype))
4165 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4166 k = (simd_clone_subparts (arginfo[i].vectype)
4167 / simd_clone_subparts (atype));
4168 gcc_assert ((k & (k - 1)) == 0);
4169 if (m == 0)
4170 vec_oprnd0
4171 = vect_get_vec_def_for_operand (op, stmt_info);
4172 else
4174 vec_oprnd0 = arginfo[i].op;
4175 if ((m & (k - 1)) == 0)
4176 vec_oprnd0
4177 = vect_get_vec_def_for_stmt_copy (vinfo,
4178 vec_oprnd0);
4180 arginfo[i].op = vec_oprnd0;
4181 vec_oprnd0
4182 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4183 bitsize_int (prec),
4184 bitsize_int ((m & (k - 1)) * prec));
4185 gassign *new_stmt
4186 = gimple_build_assign (make_ssa_name (atype),
4187 vec_oprnd0);
4188 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4189 vargs.safe_push (gimple_assign_lhs (new_stmt));
4191 else
4193 k = (simd_clone_subparts (atype)
4194 / simd_clone_subparts (arginfo[i].vectype));
4195 gcc_assert ((k & (k - 1)) == 0);
4196 vec<constructor_elt, va_gc> *ctor_elts;
4197 if (k != 1)
4198 vec_alloc (ctor_elts, k);
4199 else
4200 ctor_elts = NULL;
4201 for (l = 0; l < k; l++)
4203 if (m == 0 && l == 0)
4204 vec_oprnd0
4205 = vect_get_vec_def_for_operand (op, stmt_info);
4206 else
4207 vec_oprnd0
4208 = vect_get_vec_def_for_stmt_copy (vinfo,
4209 arginfo[i].op);
4210 arginfo[i].op = vec_oprnd0;
4211 if (k == 1)
4212 break;
4213 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4214 vec_oprnd0);
4216 if (k == 1)
4217 vargs.safe_push (vec_oprnd0);
4218 else
4220 vec_oprnd0 = build_constructor (atype, ctor_elts);
4221 gassign *new_stmt
4222 = gimple_build_assign (make_ssa_name (atype),
4223 vec_oprnd0);
4224 vect_finish_stmt_generation (stmt_info, new_stmt,
4225 gsi);
4226 vargs.safe_push (gimple_assign_lhs (new_stmt));
4230 break;
4231 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4232 vargs.safe_push (op);
4233 break;
4234 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4235 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4236 if (j == 0)
4238 gimple_seq stmts;
4239 arginfo[i].op
4240 = force_gimple_operand (arginfo[i].op, &stmts, true,
4241 NULL_TREE);
4242 if (stmts != NULL)
4244 basic_block new_bb;
4245 edge pe = loop_preheader_edge (loop);
4246 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4247 gcc_assert (!new_bb);
4249 if (arginfo[i].simd_lane_linear)
4251 vargs.safe_push (arginfo[i].op);
4252 break;
4254 tree phi_res = copy_ssa_name (op);
4255 gphi *new_phi = create_phi_node (phi_res, loop->header);
4256 loop_vinfo->add_stmt (new_phi);
4257 add_phi_arg (new_phi, arginfo[i].op,
4258 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4259 enum tree_code code
4260 = POINTER_TYPE_P (TREE_TYPE (op))
4261 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4262 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4263 ? sizetype : TREE_TYPE (op);
4264 widest_int cst
4265 = wi::mul (bestn->simdclone->args[i].linear_step,
4266 ncopies * nunits);
4267 tree tcst = wide_int_to_tree (type, cst);
4268 tree phi_arg = copy_ssa_name (op);
4269 gassign *new_stmt
4270 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4271 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4272 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4273 loop_vinfo->add_stmt (new_stmt);
4274 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4275 UNKNOWN_LOCATION);
4276 arginfo[i].op = phi_res;
4277 vargs.safe_push (phi_res);
4279 else
4281 enum tree_code code
4282 = POINTER_TYPE_P (TREE_TYPE (op))
4283 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4284 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4285 ? sizetype : TREE_TYPE (op);
4286 widest_int cst
4287 = wi::mul (bestn->simdclone->args[i].linear_step,
4288 j * nunits);
4289 tree tcst = wide_int_to_tree (type, cst);
4290 new_temp = make_ssa_name (TREE_TYPE (op));
4291 gassign *new_stmt
4292 = gimple_build_assign (new_temp, code,
4293 arginfo[i].op, tcst);
4294 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4295 vargs.safe_push (new_temp);
4297 break;
4298 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4299 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4300 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4301 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4302 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4303 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4304 default:
4305 gcc_unreachable ();
4309 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4310 if (vec_dest)
4312 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4313 if (ratype)
4314 new_temp = create_tmp_var (ratype);
4315 else if (simd_clone_subparts (vectype)
4316 == simd_clone_subparts (rtype))
4317 new_temp = make_ssa_name (vec_dest, new_call);
4318 else
4319 new_temp = make_ssa_name (rtype, new_call);
4320 gimple_call_set_lhs (new_call, new_temp);
4322 stmt_vec_info new_stmt_info
4323 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4325 if (vec_dest)
4327 if (simd_clone_subparts (vectype) < nunits)
4329 unsigned int k, l;
4330 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4331 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4332 k = nunits / simd_clone_subparts (vectype);
4333 gcc_assert ((k & (k - 1)) == 0);
4334 for (l = 0; l < k; l++)
4336 tree t;
4337 if (ratype)
4339 t = build_fold_addr_expr (new_temp);
4340 t = build2 (MEM_REF, vectype, t,
4341 build_int_cst (TREE_TYPE (t), l * bytes));
4343 else
4344 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4345 bitsize_int (prec), bitsize_int (l * prec));
4346 gimple *new_stmt
4347 = gimple_build_assign (make_ssa_name (vectype), t);
4348 new_stmt_info
4349 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4351 if (j == 0 && l == 0)
4352 STMT_VINFO_VEC_STMT (stmt_info)
4353 = *vec_stmt = new_stmt_info;
4354 else
4355 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4357 prev_stmt_info = new_stmt_info;
4360 if (ratype)
4361 vect_clobber_variable (stmt_info, gsi, new_temp);
4362 continue;
4364 else if (simd_clone_subparts (vectype) > nunits)
4366 unsigned int k = (simd_clone_subparts (vectype)
4367 / simd_clone_subparts (rtype));
4368 gcc_assert ((k & (k - 1)) == 0);
4369 if ((j & (k - 1)) == 0)
4370 vec_alloc (ret_ctor_elts, k);
4371 if (ratype)
4373 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4374 for (m = 0; m < o; m++)
4376 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4377 size_int (m), NULL_TREE, NULL_TREE);
4378 gimple *new_stmt
4379 = gimple_build_assign (make_ssa_name (rtype), tem);
4380 new_stmt_info
4381 = vect_finish_stmt_generation (stmt_info, new_stmt,
4382 gsi);
4383 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4384 gimple_assign_lhs (new_stmt));
4386 vect_clobber_variable (stmt_info, gsi, new_temp);
4388 else
4389 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4390 if ((j & (k - 1)) != k - 1)
4391 continue;
4392 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4393 gimple *new_stmt
4394 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4395 new_stmt_info
4396 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4398 if ((unsigned) j == k - 1)
4399 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4400 else
4401 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4403 prev_stmt_info = new_stmt_info;
4404 continue;
4406 else if (ratype)
4408 tree t = build_fold_addr_expr (new_temp);
4409 t = build2 (MEM_REF, vectype, t,
4410 build_int_cst (TREE_TYPE (t), 0));
4411 gimple *new_stmt
4412 = gimple_build_assign (make_ssa_name (vec_dest), t);
4413 new_stmt_info
4414 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4415 vect_clobber_variable (stmt_info, gsi, new_temp);
4419 if (j == 0)
4420 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4421 else
4422 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4424 prev_stmt_info = new_stmt_info;
4427 vargs.release ();
4429 /* The call in STMT might prevent it from being removed in dce.
4430 We however cannot remove it here, due to the way the ssa name
4431 it defines is mapped to the new definition. So just replace
4432 rhs of the statement with something harmless. */
4434 if (slp_node)
4435 return true;
4437 gimple *new_stmt;
4438 if (scalar_dest)
4440 type = TREE_TYPE (scalar_dest);
4441 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4442 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4444 else
4445 new_stmt = gimple_build_nop ();
4446 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4447 unlink_stmt_vdef (stmt);
4449 return true;
4453 /* Function vect_gen_widened_results_half
4455 Create a vector stmt whose code, type, number of arguments, and result
4456 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4457 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4458 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4459 needs to be created (DECL is a function-decl of a target-builtin).
4460 STMT_INFO is the original scalar stmt that we are vectorizing. */
4462 static gimple *
4463 vect_gen_widened_results_half (enum tree_code code,
4464 tree decl,
4465 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4466 tree vec_dest, gimple_stmt_iterator *gsi,
4467 stmt_vec_info stmt_info)
4469 gimple *new_stmt;
4470 tree new_temp;
4472 /* Generate half of the widened result: */
4473 if (code == CALL_EXPR)
4475 /* Target specific support */
4476 if (op_type == binary_op)
4477 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4478 else
4479 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4480 new_temp = make_ssa_name (vec_dest, new_stmt);
4481 gimple_call_set_lhs (new_stmt, new_temp);
4483 else
4485 /* Generic support */
4486 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4487 if (op_type != binary_op)
4488 vec_oprnd1 = NULL;
4489 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4490 new_temp = make_ssa_name (vec_dest, new_stmt);
4491 gimple_assign_set_lhs (new_stmt, new_temp);
4493 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4495 return new_stmt;
4499 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4500 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4501 containing scalar operand), and for the rest we get a copy with
4502 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4503 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4504 The vectors are collected into VEC_OPRNDS. */
4506 static void
4507 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4508 vec<tree> *vec_oprnds, int multi_step_cvt)
4510 vec_info *vinfo = stmt_info->vinfo;
4511 tree vec_oprnd;
4513 /* Get first vector operand. */
4514 /* All the vector operands except the very first one (that is scalar oprnd)
4515 are stmt copies. */
4516 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4517 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4518 else
4519 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4521 vec_oprnds->quick_push (vec_oprnd);
4523 /* Get second vector operand. */
4524 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4525 vec_oprnds->quick_push (vec_oprnd);
4527 *oprnd = vec_oprnd;
4529 /* For conversion in multiple steps, continue to get operands
4530 recursively. */
4531 if (multi_step_cvt)
4532 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4533 multi_step_cvt - 1);
4537 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4538 For multi-step conversions store the resulting vectors and call the function
4539 recursively. */
4541 static void
4542 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4543 int multi_step_cvt,
4544 stmt_vec_info stmt_info,
4545 vec<tree> vec_dsts,
4546 gimple_stmt_iterator *gsi,
4547 slp_tree slp_node, enum tree_code code,
4548 stmt_vec_info *prev_stmt_info)
4550 unsigned int i;
4551 tree vop0, vop1, new_tmp, vec_dest;
4553 vec_dest = vec_dsts.pop ();
4555 for (i = 0; i < vec_oprnds->length (); i += 2)
4557 /* Create demotion operation. */
4558 vop0 = (*vec_oprnds)[i];
4559 vop1 = (*vec_oprnds)[i + 1];
4560 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4561 new_tmp = make_ssa_name (vec_dest, new_stmt);
4562 gimple_assign_set_lhs (new_stmt, new_tmp);
4563 stmt_vec_info new_stmt_info
4564 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4566 if (multi_step_cvt)
4567 /* Store the resulting vector for next recursive call. */
4568 (*vec_oprnds)[i/2] = new_tmp;
4569 else
4571 /* This is the last step of the conversion sequence. Store the
4572 vectors in SLP_NODE or in vector info of the scalar statement
4573 (or in STMT_VINFO_RELATED_STMT chain). */
4574 if (slp_node)
4575 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4576 else
4578 if (!*prev_stmt_info)
4579 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4580 else
4581 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4583 *prev_stmt_info = new_stmt_info;
4588 /* For multi-step demotion operations we first generate demotion operations
4589 from the source type to the intermediate types, and then combine the
4590 results (stored in VEC_OPRNDS) in demotion operation to the destination
4591 type. */
4592 if (multi_step_cvt)
4594 /* At each level of recursion we have half of the operands we had at the
4595 previous level. */
4596 vec_oprnds->truncate ((i+1)/2);
4597 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4598 stmt_info, vec_dsts, gsi,
4599 slp_node, VEC_PACK_TRUNC_EXPR,
4600 prev_stmt_info);
4603 vec_dsts.quick_push (vec_dest);
4607 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4608 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4609 STMT_INFO. For multi-step conversions store the resulting vectors and
4610 call the function recursively. */
4612 static void
4613 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4614 vec<tree> *vec_oprnds1,
4615 stmt_vec_info stmt_info, tree vec_dest,
4616 gimple_stmt_iterator *gsi,
4617 enum tree_code code1,
4618 enum tree_code code2, tree decl1,
4619 tree decl2, int op_type)
4621 int i;
4622 tree vop0, vop1, new_tmp1, new_tmp2;
4623 gimple *new_stmt1, *new_stmt2;
4624 vec<tree> vec_tmp = vNULL;
4626 vec_tmp.create (vec_oprnds0->length () * 2);
4627 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4629 if (op_type == binary_op)
4630 vop1 = (*vec_oprnds1)[i];
4631 else
4632 vop1 = NULL_TREE;
4634 /* Generate the two halves of promotion operation. */
4635 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4636 op_type, vec_dest, gsi,
4637 stmt_info);
4638 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4639 op_type, vec_dest, gsi,
4640 stmt_info);
4641 if (is_gimple_call (new_stmt1))
4643 new_tmp1 = gimple_call_lhs (new_stmt1);
4644 new_tmp2 = gimple_call_lhs (new_stmt2);
4646 else
4648 new_tmp1 = gimple_assign_lhs (new_stmt1);
4649 new_tmp2 = gimple_assign_lhs (new_stmt2);
4652 /* Store the results for the next step. */
4653 vec_tmp.quick_push (new_tmp1);
4654 vec_tmp.quick_push (new_tmp2);
4657 vec_oprnds0->release ();
4658 *vec_oprnds0 = vec_tmp;
4662 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4663 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4664 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4665 Return true if STMT_INFO is vectorizable in this way. */
4667 static bool
4668 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4669 stmt_vec_info *vec_stmt, slp_tree slp_node,
4670 stmt_vector_for_cost *cost_vec)
4672 tree vec_dest;
4673 tree scalar_dest;
4674 tree op0, op1 = NULL_TREE;
4675 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4676 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4677 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4678 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4679 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4680 tree new_temp;
4681 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4682 int ndts = 2;
4683 stmt_vec_info prev_stmt_info;
4684 poly_uint64 nunits_in;
4685 poly_uint64 nunits_out;
4686 tree vectype_out, vectype_in;
4687 int ncopies, i, j;
4688 tree lhs_type, rhs_type;
4689 enum { NARROW, NONE, WIDEN } modifier;
4690 vec<tree> vec_oprnds0 = vNULL;
4691 vec<tree> vec_oprnds1 = vNULL;
4692 tree vop0;
4693 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4694 vec_info *vinfo = stmt_info->vinfo;
4695 int multi_step_cvt = 0;
4696 vec<tree> interm_types = vNULL;
4697 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4698 int op_type;
4699 unsigned short fltsz;
4701 /* Is STMT a vectorizable conversion? */
4703 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4704 return false;
4706 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4707 && ! vec_stmt)
4708 return false;
4710 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4711 if (!stmt)
4712 return false;
4714 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4715 return false;
4717 code = gimple_assign_rhs_code (stmt);
4718 if (!CONVERT_EXPR_CODE_P (code)
4719 && code != FIX_TRUNC_EXPR
4720 && code != FLOAT_EXPR
4721 && code != WIDEN_MULT_EXPR
4722 && code != WIDEN_LSHIFT_EXPR)
4723 return false;
4725 op_type = TREE_CODE_LENGTH (code);
4727 /* Check types of lhs and rhs. */
4728 scalar_dest = gimple_assign_lhs (stmt);
4729 lhs_type = TREE_TYPE (scalar_dest);
4730 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4732 op0 = gimple_assign_rhs1 (stmt);
4733 rhs_type = TREE_TYPE (op0);
4735 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4736 && !((INTEGRAL_TYPE_P (lhs_type)
4737 && INTEGRAL_TYPE_P (rhs_type))
4738 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4739 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4740 return false;
4742 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4743 && ((INTEGRAL_TYPE_P (lhs_type)
4744 && !type_has_mode_precision_p (lhs_type))
4745 || (INTEGRAL_TYPE_P (rhs_type)
4746 && !type_has_mode_precision_p (rhs_type))))
4748 if (dump_enabled_p ())
4749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4750 "type conversion to/from bit-precision unsupported."
4751 "\n");
4752 return false;
4755 /* Check the operands of the operation. */
4756 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4758 if (dump_enabled_p ())
4759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4760 "use not simple.\n");
4761 return false;
4763 if (op_type == binary_op)
4765 bool ok;
4767 op1 = gimple_assign_rhs2 (stmt);
4768 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4769 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4770 OP1. */
4771 if (CONSTANT_CLASS_P (op0))
4772 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4773 else
4774 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4776 if (!ok)
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4780 "use not simple.\n");
4781 return false;
4785 /* If op0 is an external or constant defs use a vector type of
4786 the same size as the output vector type. */
4787 if (!vectype_in)
4788 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4789 if (vec_stmt)
4790 gcc_assert (vectype_in);
4791 if (!vectype_in)
4793 if (dump_enabled_p ())
4794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4795 "no vectype for scalar type %T\n", rhs_type);
4797 return false;
4800 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4801 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4803 if (dump_enabled_p ())
4804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4805 "can't convert between boolean and non "
4806 "boolean vectors %T\n", rhs_type);
4808 return false;
4811 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4812 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4813 if (known_eq (nunits_out, nunits_in))
4814 modifier = NONE;
4815 else if (multiple_p (nunits_out, nunits_in))
4816 modifier = NARROW;
4817 else
4819 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4820 modifier = WIDEN;
4823 /* Multiple types in SLP are handled by creating the appropriate number of
4824 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4825 case of SLP. */
4826 if (slp_node)
4827 ncopies = 1;
4828 else if (modifier == NARROW)
4829 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4830 else
4831 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4833 /* Sanity check: make sure that at least one copy of the vectorized stmt
4834 needs to be generated. */
4835 gcc_assert (ncopies >= 1);
4837 bool found_mode = false;
4838 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4839 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4840 opt_scalar_mode rhs_mode_iter;
4842 /* Supportable by target? */
4843 switch (modifier)
4845 case NONE:
4846 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4847 return false;
4848 if (supportable_convert_operation (code, vectype_out, vectype_in,
4849 &decl1, &code1))
4850 break;
4851 /* FALLTHRU */
4852 unsupported:
4853 if (dump_enabled_p ())
4854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4855 "conversion not supported by target.\n");
4856 return false;
4858 case WIDEN:
4859 if (supportable_widening_operation (code, stmt_info, vectype_out,
4860 vectype_in, &code1, &code2,
4861 &multi_step_cvt, &interm_types))
4863 /* Binary widening operation can only be supported directly by the
4864 architecture. */
4865 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4866 break;
4869 if (code != FLOAT_EXPR
4870 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4871 goto unsupported;
4873 fltsz = GET_MODE_SIZE (lhs_mode);
4874 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4876 rhs_mode = rhs_mode_iter.require ();
4877 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4878 break;
4880 cvt_type
4881 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4882 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4883 if (cvt_type == NULL_TREE)
4884 goto unsupported;
4886 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4888 if (!supportable_convert_operation (code, vectype_out,
4889 cvt_type, &decl1, &codecvt1))
4890 goto unsupported;
4892 else if (!supportable_widening_operation (code, stmt_info,
4893 vectype_out, cvt_type,
4894 &codecvt1, &codecvt2,
4895 &multi_step_cvt,
4896 &interm_types))
4897 continue;
4898 else
4899 gcc_assert (multi_step_cvt == 0);
4901 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4902 vectype_in, &code1, &code2,
4903 &multi_step_cvt, &interm_types))
4905 found_mode = true;
4906 break;
4910 if (!found_mode)
4911 goto unsupported;
4913 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4914 codecvt2 = ERROR_MARK;
4915 else
4917 multi_step_cvt++;
4918 interm_types.safe_push (cvt_type);
4919 cvt_type = NULL_TREE;
4921 break;
4923 case NARROW:
4924 gcc_assert (op_type == unary_op);
4925 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4926 &code1, &multi_step_cvt,
4927 &interm_types))
4928 break;
4930 if (code != FIX_TRUNC_EXPR
4931 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4932 goto unsupported;
4934 cvt_type
4935 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4936 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4937 if (cvt_type == NULL_TREE)
4938 goto unsupported;
4939 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4940 &decl1, &codecvt1))
4941 goto unsupported;
4942 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4943 &code1, &multi_step_cvt,
4944 &interm_types))
4945 break;
4946 goto unsupported;
4948 default:
4949 gcc_unreachable ();
4952 if (!vec_stmt) /* transformation not required. */
4954 DUMP_VECT_SCOPE ("vectorizable_conversion");
4955 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4957 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4958 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4959 cost_vec);
4961 else if (modifier == NARROW)
4963 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4964 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4965 cost_vec);
4967 else
4969 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4970 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4971 cost_vec);
4973 interm_types.release ();
4974 return true;
4977 /* Transform. */
4978 if (dump_enabled_p ())
4979 dump_printf_loc (MSG_NOTE, vect_location,
4980 "transform conversion. ncopies = %d.\n", ncopies);
4982 if (op_type == binary_op)
4984 if (CONSTANT_CLASS_P (op0))
4985 op0 = fold_convert (TREE_TYPE (op1), op0);
4986 else if (CONSTANT_CLASS_P (op1))
4987 op1 = fold_convert (TREE_TYPE (op0), op1);
4990 /* In case of multi-step conversion, we first generate conversion operations
4991 to the intermediate types, and then from that types to the final one.
4992 We create vector destinations for the intermediate type (TYPES) received
4993 from supportable_*_operation, and store them in the correct order
4994 for future use in vect_create_vectorized_*_stmts (). */
4995 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4996 vec_dest = vect_create_destination_var (scalar_dest,
4997 (cvt_type && modifier == WIDEN)
4998 ? cvt_type : vectype_out);
4999 vec_dsts.quick_push (vec_dest);
5001 if (multi_step_cvt)
5003 for (i = interm_types.length () - 1;
5004 interm_types.iterate (i, &intermediate_type); i--)
5006 vec_dest = vect_create_destination_var (scalar_dest,
5007 intermediate_type);
5008 vec_dsts.quick_push (vec_dest);
5012 if (cvt_type)
5013 vec_dest = vect_create_destination_var (scalar_dest,
5014 modifier == WIDEN
5015 ? vectype_out : cvt_type);
5017 if (!slp_node)
5019 if (modifier == WIDEN)
5021 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5022 if (op_type == binary_op)
5023 vec_oprnds1.create (1);
5025 else if (modifier == NARROW)
5026 vec_oprnds0.create (
5027 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5029 else if (code == WIDEN_LSHIFT_EXPR)
5030 vec_oprnds1.create (slp_node->vec_stmts_size);
5032 last_oprnd = op0;
5033 prev_stmt_info = NULL;
5034 switch (modifier)
5036 case NONE:
5037 for (j = 0; j < ncopies; j++)
5039 if (j == 0)
5040 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5041 NULL, slp_node);
5042 else
5043 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5045 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5047 stmt_vec_info new_stmt_info;
5048 /* Arguments are ready, create the new vector stmt. */
5049 if (code1 == CALL_EXPR)
5051 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5052 new_temp = make_ssa_name (vec_dest, new_stmt);
5053 gimple_call_set_lhs (new_stmt, new_temp);
5054 new_stmt_info
5055 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5057 else
5059 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5060 gassign *new_stmt
5061 = gimple_build_assign (vec_dest, code1, vop0);
5062 new_temp = make_ssa_name (vec_dest, new_stmt);
5063 gimple_assign_set_lhs (new_stmt, new_temp);
5064 new_stmt_info
5065 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5068 if (slp_node)
5069 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5070 else
5072 if (!prev_stmt_info)
5073 STMT_VINFO_VEC_STMT (stmt_info)
5074 = *vec_stmt = new_stmt_info;
5075 else
5076 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5077 prev_stmt_info = new_stmt_info;
5081 break;
5083 case WIDEN:
5084 /* In case the vectorization factor (VF) is bigger than the number
5085 of elements that we can fit in a vectype (nunits), we have to
5086 generate more than one vector stmt - i.e - we need to "unroll"
5087 the vector stmt by a factor VF/nunits. */
5088 for (j = 0; j < ncopies; j++)
5090 /* Handle uses. */
5091 if (j == 0)
5093 if (slp_node)
5095 if (code == WIDEN_LSHIFT_EXPR)
5097 unsigned int k;
5099 vec_oprnd1 = op1;
5100 /* Store vec_oprnd1 for every vector stmt to be created
5101 for SLP_NODE. We check during the analysis that all
5102 the shift arguments are the same. */
5103 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5104 vec_oprnds1.quick_push (vec_oprnd1);
5106 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5107 &vec_oprnds0, NULL, slp_node);
5109 else
5110 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5111 &vec_oprnds1, slp_node);
5113 else
5115 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5116 vec_oprnds0.quick_push (vec_oprnd0);
5117 if (op_type == binary_op)
5119 if (code == WIDEN_LSHIFT_EXPR)
5120 vec_oprnd1 = op1;
5121 else
5122 vec_oprnd1
5123 = vect_get_vec_def_for_operand (op1, stmt_info);
5124 vec_oprnds1.quick_push (vec_oprnd1);
5128 else
5130 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5131 vec_oprnds0.truncate (0);
5132 vec_oprnds0.quick_push (vec_oprnd0);
5133 if (op_type == binary_op)
5135 if (code == WIDEN_LSHIFT_EXPR)
5136 vec_oprnd1 = op1;
5137 else
5138 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5139 vec_oprnd1);
5140 vec_oprnds1.truncate (0);
5141 vec_oprnds1.quick_push (vec_oprnd1);
5145 /* Arguments are ready. Create the new vector stmts. */
5146 for (i = multi_step_cvt; i >= 0; i--)
5148 tree this_dest = vec_dsts[i];
5149 enum tree_code c1 = code1, c2 = code2;
5150 if (i == 0 && codecvt2 != ERROR_MARK)
5152 c1 = codecvt1;
5153 c2 = codecvt2;
5155 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5156 &vec_oprnds1, stmt_info,
5157 this_dest, gsi,
5158 c1, c2, decl1, decl2,
5159 op_type);
5162 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5164 stmt_vec_info new_stmt_info;
5165 if (cvt_type)
5167 if (codecvt1 == CALL_EXPR)
5169 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5170 new_temp = make_ssa_name (vec_dest, new_stmt);
5171 gimple_call_set_lhs (new_stmt, new_temp);
5172 new_stmt_info
5173 = vect_finish_stmt_generation (stmt_info, new_stmt,
5174 gsi);
5176 else
5178 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5179 new_temp = make_ssa_name (vec_dest);
5180 gassign *new_stmt
5181 = gimple_build_assign (new_temp, codecvt1, vop0);
5182 new_stmt_info
5183 = vect_finish_stmt_generation (stmt_info, new_stmt,
5184 gsi);
5187 else
5188 new_stmt_info = vinfo->lookup_def (vop0);
5190 if (slp_node)
5191 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5192 else
5194 if (!prev_stmt_info)
5195 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5196 else
5197 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5198 prev_stmt_info = new_stmt_info;
5203 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5204 break;
5206 case NARROW:
5207 /* In case the vectorization factor (VF) is bigger than the number
5208 of elements that we can fit in a vectype (nunits), we have to
5209 generate more than one vector stmt - i.e - we need to "unroll"
5210 the vector stmt by a factor VF/nunits. */
5211 for (j = 0; j < ncopies; j++)
5213 /* Handle uses. */
5214 if (slp_node)
5215 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5216 slp_node);
5217 else
5219 vec_oprnds0.truncate (0);
5220 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5221 vect_pow2 (multi_step_cvt) - 1);
5224 /* Arguments are ready. Create the new vector stmts. */
5225 if (cvt_type)
5226 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5228 if (codecvt1 == CALL_EXPR)
5230 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5231 new_temp = make_ssa_name (vec_dest, new_stmt);
5232 gimple_call_set_lhs (new_stmt, new_temp);
5233 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5235 else
5237 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5238 new_temp = make_ssa_name (vec_dest);
5239 gassign *new_stmt
5240 = gimple_build_assign (new_temp, codecvt1, vop0);
5241 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5244 vec_oprnds0[i] = new_temp;
5247 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5248 stmt_info, vec_dsts, gsi,
5249 slp_node, code1,
5250 &prev_stmt_info);
5253 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5254 break;
5257 vec_oprnds0.release ();
5258 vec_oprnds1.release ();
5259 interm_types.release ();
5261 return true;
5265 /* Function vectorizable_assignment.
5267 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5268 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5269 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5270 Return true if STMT_INFO is vectorizable in this way. */
5272 static bool
5273 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5274 stmt_vec_info *vec_stmt, slp_tree slp_node,
5275 stmt_vector_for_cost *cost_vec)
5277 tree vec_dest;
5278 tree scalar_dest;
5279 tree op;
5280 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5281 tree new_temp;
5282 enum vect_def_type dt[1] = {vect_unknown_def_type};
5283 int ndts = 1;
5284 int ncopies;
5285 int i, j;
5286 vec<tree> vec_oprnds = vNULL;
5287 tree vop;
5288 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5289 vec_info *vinfo = stmt_info->vinfo;
5290 stmt_vec_info prev_stmt_info = NULL;
5291 enum tree_code code;
5292 tree vectype_in;
5294 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5295 return false;
5297 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5298 && ! vec_stmt)
5299 return false;
5301 /* Is vectorizable assignment? */
5302 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5303 if (!stmt)
5304 return false;
5306 scalar_dest = gimple_assign_lhs (stmt);
5307 if (TREE_CODE (scalar_dest) != SSA_NAME)
5308 return false;
5310 code = gimple_assign_rhs_code (stmt);
5311 if (gimple_assign_single_p (stmt)
5312 || code == PAREN_EXPR
5313 || CONVERT_EXPR_CODE_P (code))
5314 op = gimple_assign_rhs1 (stmt);
5315 else
5316 return false;
5318 if (code == VIEW_CONVERT_EXPR)
5319 op = TREE_OPERAND (op, 0);
5321 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5322 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5324 /* Multiple types in SLP are handled by creating the appropriate number of
5325 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5326 case of SLP. */
5327 if (slp_node)
5328 ncopies = 1;
5329 else
5330 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5332 gcc_assert (ncopies >= 1);
5334 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5336 if (dump_enabled_p ())
5337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5338 "use not simple.\n");
5339 return false;
5342 /* We can handle NOP_EXPR conversions that do not change the number
5343 of elements or the vector size. */
5344 if ((CONVERT_EXPR_CODE_P (code)
5345 || code == VIEW_CONVERT_EXPR)
5346 && (!vectype_in
5347 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5348 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5349 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5350 return false;
5352 /* We do not handle bit-precision changes. */
5353 if ((CONVERT_EXPR_CODE_P (code)
5354 || code == VIEW_CONVERT_EXPR)
5355 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5356 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5357 || !type_has_mode_precision_p (TREE_TYPE (op)))
5358 /* But a conversion that does not change the bit-pattern is ok. */
5359 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5360 > TYPE_PRECISION (TREE_TYPE (op)))
5361 && TYPE_UNSIGNED (TREE_TYPE (op)))
5362 /* Conversion between boolean types of different sizes is
5363 a simple assignment in case their vectypes are same
5364 boolean vectors. */
5365 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5366 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5368 if (dump_enabled_p ())
5369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5370 "type conversion to/from bit-precision "
5371 "unsupported.\n");
5372 return false;
5375 if (!vec_stmt) /* transformation not required. */
5377 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5378 DUMP_VECT_SCOPE ("vectorizable_assignment");
5379 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5380 return true;
5383 /* Transform. */
5384 if (dump_enabled_p ())
5385 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5387 /* Handle def. */
5388 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5390 /* Handle use. */
5391 for (j = 0; j < ncopies; j++)
5393 /* Handle uses. */
5394 if (j == 0)
5395 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5396 else
5397 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5399 /* Arguments are ready. create the new vector stmt. */
5400 stmt_vec_info new_stmt_info = NULL;
5401 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5403 if (CONVERT_EXPR_CODE_P (code)
5404 || code == VIEW_CONVERT_EXPR)
5405 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5406 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5407 new_temp = make_ssa_name (vec_dest, new_stmt);
5408 gimple_assign_set_lhs (new_stmt, new_temp);
5409 new_stmt_info
5410 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5411 if (slp_node)
5412 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5415 if (slp_node)
5416 continue;
5418 if (j == 0)
5419 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5420 else
5421 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5423 prev_stmt_info = new_stmt_info;
5426 vec_oprnds.release ();
5427 return true;
5431 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5432 either as shift by a scalar or by a vector. */
5434 bool
5435 vect_supportable_shift (enum tree_code code, tree scalar_type)
5438 machine_mode vec_mode;
5439 optab optab;
5440 int icode;
5441 tree vectype;
5443 vectype = get_vectype_for_scalar_type (scalar_type);
5444 if (!vectype)
5445 return false;
5447 optab = optab_for_tree_code (code, vectype, optab_scalar);
5448 if (!optab
5449 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5451 optab = optab_for_tree_code (code, vectype, optab_vector);
5452 if (!optab
5453 || (optab_handler (optab, TYPE_MODE (vectype))
5454 == CODE_FOR_nothing))
5455 return false;
5458 vec_mode = TYPE_MODE (vectype);
5459 icode = (int) optab_handler (optab, vec_mode);
5460 if (icode == CODE_FOR_nothing)
5461 return false;
5463 return true;
5467 /* Function vectorizable_shift.
5469 Check if STMT_INFO performs a shift operation that can be vectorized.
5470 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5471 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5472 Return true if STMT_INFO is vectorizable in this way. */
5474 bool
5475 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5476 stmt_vec_info *vec_stmt, slp_tree slp_node,
5477 stmt_vector_for_cost *cost_vec)
5479 tree vec_dest;
5480 tree scalar_dest;
5481 tree op0, op1 = NULL;
5482 tree vec_oprnd1 = NULL_TREE;
5483 tree vectype;
5484 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5485 enum tree_code code;
5486 machine_mode vec_mode;
5487 tree new_temp;
5488 optab optab;
5489 int icode;
5490 machine_mode optab_op2_mode;
5491 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5492 int ndts = 2;
5493 stmt_vec_info prev_stmt_info;
5494 poly_uint64 nunits_in;
5495 poly_uint64 nunits_out;
5496 tree vectype_out;
5497 tree op1_vectype;
5498 int ncopies;
5499 int j, i;
5500 vec<tree> vec_oprnds0 = vNULL;
5501 vec<tree> vec_oprnds1 = vNULL;
5502 tree vop0, vop1;
5503 unsigned int k;
5504 bool scalar_shift_arg = true;
5505 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5506 vec_info *vinfo = stmt_info->vinfo;
5508 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5509 return false;
5511 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5512 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5513 && ! vec_stmt)
5514 return false;
5516 /* Is STMT a vectorizable binary/unary operation? */
5517 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5518 if (!stmt)
5519 return false;
5521 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5522 return false;
5524 code = gimple_assign_rhs_code (stmt);
5526 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5527 || code == RROTATE_EXPR))
5528 return false;
5530 scalar_dest = gimple_assign_lhs (stmt);
5531 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5532 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5534 if (dump_enabled_p ())
5535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5536 "bit-precision shifts not supported.\n");
5537 return false;
5540 op0 = gimple_assign_rhs1 (stmt);
5541 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5543 if (dump_enabled_p ())
5544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5545 "use not simple.\n");
5546 return false;
5548 /* If op0 is an external or constant def use a vector type with
5549 the same size as the output vector type. */
5550 if (!vectype)
5551 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5552 if (vec_stmt)
5553 gcc_assert (vectype);
5554 if (!vectype)
5556 if (dump_enabled_p ())
5557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5558 "no vectype for scalar type\n");
5559 return false;
5562 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5563 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5564 if (maybe_ne (nunits_out, nunits_in))
5565 return false;
5567 op1 = gimple_assign_rhs2 (stmt);
5568 stmt_vec_info op1_def_stmt_info;
5569 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5570 &op1_def_stmt_info))
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5574 "use not simple.\n");
5575 return false;
5578 /* Multiple types in SLP are handled by creating the appropriate number of
5579 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5580 case of SLP. */
5581 if (slp_node)
5582 ncopies = 1;
5583 else
5584 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5586 gcc_assert (ncopies >= 1);
5588 /* Determine whether the shift amount is a vector, or scalar. If the
5589 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5591 if ((dt[1] == vect_internal_def
5592 || dt[1] == vect_induction_def
5593 || dt[1] == vect_nested_cycle)
5594 && !slp_node)
5595 scalar_shift_arg = false;
5596 else if (dt[1] == vect_constant_def
5597 || dt[1] == vect_external_def
5598 || dt[1] == vect_internal_def)
5600 /* In SLP, need to check whether the shift count is the same,
5601 in loops if it is a constant or invariant, it is always
5602 a scalar shift. */
5603 if (slp_node)
5605 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5606 stmt_vec_info slpstmt_info;
5608 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5610 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5611 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5612 scalar_shift_arg = false;
5615 /* For internal SLP defs we have to make sure we see scalar stmts
5616 for all vector elements.
5617 ??? For different vectors we could resort to a different
5618 scalar shift operand but code-generation below simply always
5619 takes the first. */
5620 if (dt[1] == vect_internal_def
5621 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5622 stmts.length ()))
5623 scalar_shift_arg = false;
5626 /* If the shift amount is computed by a pattern stmt we cannot
5627 use the scalar amount directly thus give up and use a vector
5628 shift. */
5629 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5630 scalar_shift_arg = false;
5632 else
5634 if (dump_enabled_p ())
5635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5636 "operand mode requires invariant argument.\n");
5637 return false;
5640 /* Vector shifted by vector. */
5641 if (!scalar_shift_arg)
5643 optab = optab_for_tree_code (code, vectype, optab_vector);
5644 if (dump_enabled_p ())
5645 dump_printf_loc (MSG_NOTE, vect_location,
5646 "vector/vector shift/rotate found.\n");
5648 if (!op1_vectype)
5649 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5650 if (op1_vectype == NULL_TREE
5651 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5653 if (dump_enabled_p ())
5654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5655 "unusable type for last operand in"
5656 " vector/vector shift/rotate.\n");
5657 return false;
5660 /* See if the machine has a vector shifted by scalar insn and if not
5661 then see if it has a vector shifted by vector insn. */
5662 else
5664 optab = optab_for_tree_code (code, vectype, optab_scalar);
5665 if (optab
5666 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5668 if (dump_enabled_p ())
5669 dump_printf_loc (MSG_NOTE, vect_location,
5670 "vector/scalar shift/rotate found.\n");
5672 else
5674 optab = optab_for_tree_code (code, vectype, optab_vector);
5675 if (optab
5676 && (optab_handler (optab, TYPE_MODE (vectype))
5677 != CODE_FOR_nothing))
5679 scalar_shift_arg = false;
5681 if (dump_enabled_p ())
5682 dump_printf_loc (MSG_NOTE, vect_location,
5683 "vector/vector shift/rotate found.\n");
5685 /* Unlike the other binary operators, shifts/rotates have
5686 the rhs being int, instead of the same type as the lhs,
5687 so make sure the scalar is the right type if we are
5688 dealing with vectors of long long/long/short/char. */
5689 if (dt[1] == vect_constant_def)
5690 op1 = fold_convert (TREE_TYPE (vectype), op1);
5691 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5692 TREE_TYPE (op1)))
5694 if (slp_node
5695 && TYPE_MODE (TREE_TYPE (vectype))
5696 != TYPE_MODE (TREE_TYPE (op1)))
5698 if (dump_enabled_p ())
5699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5700 "unusable type for last operand in"
5701 " vector/vector shift/rotate.\n");
5702 return false;
5704 if (vec_stmt && !slp_node)
5706 op1 = fold_convert (TREE_TYPE (vectype), op1);
5707 op1 = vect_init_vector (stmt_info, op1,
5708 TREE_TYPE (vectype), NULL);
5715 /* Supportable by target? */
5716 if (!optab)
5718 if (dump_enabled_p ())
5719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5720 "no optab.\n");
5721 return false;
5723 vec_mode = TYPE_MODE (vectype);
5724 icode = (int) optab_handler (optab, vec_mode);
5725 if (icode == CODE_FOR_nothing)
5727 if (dump_enabled_p ())
5728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5729 "op not supported by target.\n");
5730 /* Check only during analysis. */
5731 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5732 || (!vec_stmt
5733 && !vect_worthwhile_without_simd_p (vinfo, code)))
5734 return false;
5735 if (dump_enabled_p ())
5736 dump_printf_loc (MSG_NOTE, vect_location,
5737 "proceeding using word mode.\n");
5740 /* Worthwhile without SIMD support? Check only during analysis. */
5741 if (!vec_stmt
5742 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5743 && !vect_worthwhile_without_simd_p (vinfo, code))
5745 if (dump_enabled_p ())
5746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5747 "not worthwhile without SIMD support.\n");
5748 return false;
5751 if (!vec_stmt) /* transformation not required. */
5753 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5754 DUMP_VECT_SCOPE ("vectorizable_shift");
5755 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5756 return true;
5759 /* Transform. */
5761 if (dump_enabled_p ())
5762 dump_printf_loc (MSG_NOTE, vect_location,
5763 "transform binary/unary operation.\n");
5765 /* Handle def. */
5766 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5768 prev_stmt_info = NULL;
5769 for (j = 0; j < ncopies; j++)
5771 /* Handle uses. */
5772 if (j == 0)
5774 if (scalar_shift_arg)
5776 /* Vector shl and shr insn patterns can be defined with scalar
5777 operand 2 (shift operand). In this case, use constant or loop
5778 invariant op1 directly, without extending it to vector mode
5779 first. */
5780 optab_op2_mode = insn_data[icode].operand[2].mode;
5781 if (!VECTOR_MODE_P (optab_op2_mode))
5783 if (dump_enabled_p ())
5784 dump_printf_loc (MSG_NOTE, vect_location,
5785 "operand 1 using scalar mode.\n");
5786 vec_oprnd1 = op1;
5787 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5788 vec_oprnds1.quick_push (vec_oprnd1);
5789 if (slp_node)
5791 /* Store vec_oprnd1 for every vector stmt to be created
5792 for SLP_NODE. We check during the analysis that all
5793 the shift arguments are the same.
5794 TODO: Allow different constants for different vector
5795 stmts generated for an SLP instance. */
5796 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5797 vec_oprnds1.quick_push (vec_oprnd1);
5802 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5803 (a special case for certain kind of vector shifts); otherwise,
5804 operand 1 should be of a vector type (the usual case). */
5805 if (vec_oprnd1)
5806 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5807 slp_node);
5808 else
5809 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5810 slp_node);
5812 else
5813 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5815 /* Arguments are ready. Create the new vector stmt. */
5816 stmt_vec_info new_stmt_info = NULL;
5817 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5819 vop1 = vec_oprnds1[i];
5820 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5821 new_temp = make_ssa_name (vec_dest, new_stmt);
5822 gimple_assign_set_lhs (new_stmt, new_temp);
5823 new_stmt_info
5824 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5825 if (slp_node)
5826 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5829 if (slp_node)
5830 continue;
5832 if (j == 0)
5833 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5834 else
5835 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5836 prev_stmt_info = new_stmt_info;
5839 vec_oprnds0.release ();
5840 vec_oprnds1.release ();
5842 return true;
5846 /* Function vectorizable_operation.
5848 Check if STMT_INFO performs a binary, unary or ternary operation that can
5849 be vectorized.
5850 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5851 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5852 Return true if STMT_INFO is vectorizable in this way. */
5854 static bool
5855 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5856 stmt_vec_info *vec_stmt, slp_tree slp_node,
5857 stmt_vector_for_cost *cost_vec)
5859 tree vec_dest;
5860 tree scalar_dest;
5861 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5862 tree vectype;
5863 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5864 enum tree_code code, orig_code;
5865 machine_mode vec_mode;
5866 tree new_temp;
5867 int op_type;
5868 optab optab;
5869 bool target_support_p;
5870 enum vect_def_type dt[3]
5871 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5872 int ndts = 3;
5873 stmt_vec_info prev_stmt_info;
5874 poly_uint64 nunits_in;
5875 poly_uint64 nunits_out;
5876 tree vectype_out;
5877 int ncopies;
5878 int j, i;
5879 vec<tree> vec_oprnds0 = vNULL;
5880 vec<tree> vec_oprnds1 = vNULL;
5881 vec<tree> vec_oprnds2 = vNULL;
5882 tree vop0, vop1, vop2;
5883 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5884 vec_info *vinfo = stmt_info->vinfo;
5886 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5887 return false;
5889 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5890 && ! vec_stmt)
5891 return false;
5893 /* Is STMT a vectorizable binary/unary operation? */
5894 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5895 if (!stmt)
5896 return false;
5898 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5899 return false;
5901 orig_code = code = gimple_assign_rhs_code (stmt);
5903 /* For pointer addition and subtraction, we should use the normal
5904 plus and minus for the vector operation. */
5905 if (code == POINTER_PLUS_EXPR)
5906 code = PLUS_EXPR;
5907 if (code == POINTER_DIFF_EXPR)
5908 code = MINUS_EXPR;
5910 /* Support only unary or binary operations. */
5911 op_type = TREE_CODE_LENGTH (code);
5912 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5914 if (dump_enabled_p ())
5915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5916 "num. args = %d (not unary/binary/ternary op).\n",
5917 op_type);
5918 return false;
5921 scalar_dest = gimple_assign_lhs (stmt);
5922 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5924 /* Most operations cannot handle bit-precision types without extra
5925 truncations. */
5926 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5927 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5928 /* Exception are bitwise binary operations. */
5929 && code != BIT_IOR_EXPR
5930 && code != BIT_XOR_EXPR
5931 && code != BIT_AND_EXPR)
5933 if (dump_enabled_p ())
5934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5935 "bit-precision arithmetic not supported.\n");
5936 return false;
5939 op0 = gimple_assign_rhs1 (stmt);
5940 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5942 if (dump_enabled_p ())
5943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5944 "use not simple.\n");
5945 return false;
5947 /* If op0 is an external or constant def use a vector type with
5948 the same size as the output vector type. */
5949 if (!vectype)
5951 /* For boolean type we cannot determine vectype by
5952 invariant value (don't know whether it is a vector
5953 of booleans or vector of integers). We use output
5954 vectype because operations on boolean don't change
5955 type. */
5956 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5958 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5960 if (dump_enabled_p ())
5961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5962 "not supported operation on bool value.\n");
5963 return false;
5965 vectype = vectype_out;
5967 else
5968 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5970 if (vec_stmt)
5971 gcc_assert (vectype);
5972 if (!vectype)
5974 if (dump_enabled_p ())
5975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5976 "no vectype for scalar type %T\n",
5977 TREE_TYPE (op0));
5979 return false;
5982 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5983 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5984 if (maybe_ne (nunits_out, nunits_in))
5985 return false;
5987 if (op_type == binary_op || op_type == ternary_op)
5989 op1 = gimple_assign_rhs2 (stmt);
5990 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5992 if (dump_enabled_p ())
5993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5994 "use not simple.\n");
5995 return false;
5998 if (op_type == ternary_op)
6000 op2 = gimple_assign_rhs3 (stmt);
6001 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
6003 if (dump_enabled_p ())
6004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6005 "use not simple.\n");
6006 return false;
6010 /* Multiple types in SLP are handled by creating the appropriate number of
6011 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6012 case of SLP. */
6013 if (slp_node)
6014 ncopies = 1;
6015 else
6016 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6018 gcc_assert (ncopies >= 1);
6020 /* Shifts are handled in vectorizable_shift (). */
6021 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6022 || code == RROTATE_EXPR)
6023 return false;
6025 /* Supportable by target? */
6027 vec_mode = TYPE_MODE (vectype);
6028 if (code == MULT_HIGHPART_EXPR)
6029 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6030 else
6032 optab = optab_for_tree_code (code, vectype, optab_default);
6033 if (!optab)
6035 if (dump_enabled_p ())
6036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6037 "no optab.\n");
6038 return false;
6040 target_support_p = (optab_handler (optab, vec_mode)
6041 != CODE_FOR_nothing);
6044 if (!target_support_p)
6046 if (dump_enabled_p ())
6047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6048 "op not supported by target.\n");
6049 /* Check only during analysis. */
6050 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6051 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6052 return false;
6053 if (dump_enabled_p ())
6054 dump_printf_loc (MSG_NOTE, vect_location,
6055 "proceeding using word mode.\n");
6058 /* Worthwhile without SIMD support? Check only during analysis. */
6059 if (!VECTOR_MODE_P (vec_mode)
6060 && !vec_stmt
6061 && !vect_worthwhile_without_simd_p (vinfo, code))
6063 if (dump_enabled_p ())
6064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6065 "not worthwhile without SIMD support.\n");
6066 return false;
6069 if (!vec_stmt) /* transformation not required. */
6071 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6072 DUMP_VECT_SCOPE ("vectorizable_operation");
6073 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6074 return true;
6077 /* Transform. */
6079 if (dump_enabled_p ())
6080 dump_printf_loc (MSG_NOTE, vect_location,
6081 "transform binary/unary operation.\n");
6083 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6084 vectors with unsigned elements, but the result is signed. So, we
6085 need to compute the MINUS_EXPR into vectype temporary and
6086 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6087 tree vec_cvt_dest = NULL_TREE;
6088 if (orig_code == POINTER_DIFF_EXPR)
6090 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6091 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6093 /* Handle def. */
6094 else
6095 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6097 /* In case the vectorization factor (VF) is bigger than the number
6098 of elements that we can fit in a vectype (nunits), we have to generate
6099 more than one vector stmt - i.e - we need to "unroll" the
6100 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6101 from one copy of the vector stmt to the next, in the field
6102 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6103 stages to find the correct vector defs to be used when vectorizing
6104 stmts that use the defs of the current stmt. The example below
6105 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6106 we need to create 4 vectorized stmts):
6108 before vectorization:
6109 RELATED_STMT VEC_STMT
6110 S1: x = memref - -
6111 S2: z = x + 1 - -
6113 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6114 there):
6115 RELATED_STMT VEC_STMT
6116 VS1_0: vx0 = memref0 VS1_1 -
6117 VS1_1: vx1 = memref1 VS1_2 -
6118 VS1_2: vx2 = memref2 VS1_3 -
6119 VS1_3: vx3 = memref3 - -
6120 S1: x = load - VS1_0
6121 S2: z = x + 1 - -
6123 step2: vectorize stmt S2 (done here):
6124 To vectorize stmt S2 we first need to find the relevant vector
6125 def for the first operand 'x'. This is, as usual, obtained from
6126 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6127 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6128 relevant vector def 'vx0'. Having found 'vx0' we can generate
6129 the vector stmt VS2_0, and as usual, record it in the
6130 STMT_VINFO_VEC_STMT of stmt S2.
6131 When creating the second copy (VS2_1), we obtain the relevant vector
6132 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6133 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6134 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6135 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6136 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6137 chain of stmts and pointers:
6138 RELATED_STMT VEC_STMT
6139 VS1_0: vx0 = memref0 VS1_1 -
6140 VS1_1: vx1 = memref1 VS1_2 -
6141 VS1_2: vx2 = memref2 VS1_3 -
6142 VS1_3: vx3 = memref3 - -
6143 S1: x = load - VS1_0
6144 VS2_0: vz0 = vx0 + v1 VS2_1 -
6145 VS2_1: vz1 = vx1 + v1 VS2_2 -
6146 VS2_2: vz2 = vx2 + v1 VS2_3 -
6147 VS2_3: vz3 = vx3 + v1 - -
6148 S2: z = x + 1 - VS2_0 */
6150 prev_stmt_info = NULL;
6151 for (j = 0; j < ncopies; j++)
6153 /* Handle uses. */
6154 if (j == 0)
6156 if (op_type == binary_op)
6157 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6158 slp_node);
6159 else if (op_type == ternary_op)
6161 if (slp_node)
6163 auto_vec<tree> ops(3);
6164 ops.quick_push (op0);
6165 ops.quick_push (op1);
6166 ops.quick_push (op2);
6167 auto_vec<vec<tree> > vec_defs(3);
6168 vect_get_slp_defs (ops, slp_node, &vec_defs);
6169 vec_oprnds0 = vec_defs[0];
6170 vec_oprnds1 = vec_defs[1];
6171 vec_oprnds2 = vec_defs[2];
6173 else
6175 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6176 &vec_oprnds1, NULL);
6177 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6178 NULL, NULL);
6181 else
6182 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6183 slp_node);
6185 else
6187 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6188 if (op_type == ternary_op)
6190 tree vec_oprnd = vec_oprnds2.pop ();
6191 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6192 vec_oprnd));
6196 /* Arguments are ready. Create the new vector stmt. */
6197 stmt_vec_info new_stmt_info = NULL;
6198 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6200 vop1 = ((op_type == binary_op || op_type == ternary_op)
6201 ? vec_oprnds1[i] : NULL_TREE);
6202 vop2 = ((op_type == ternary_op)
6203 ? vec_oprnds2[i] : NULL_TREE);
6204 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6205 vop0, vop1, vop2);
6206 new_temp = make_ssa_name (vec_dest, new_stmt);
6207 gimple_assign_set_lhs (new_stmt, new_temp);
6208 new_stmt_info
6209 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6210 if (vec_cvt_dest)
6212 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6213 gassign *new_stmt
6214 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6215 new_temp);
6216 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6217 gimple_assign_set_lhs (new_stmt, new_temp);
6218 new_stmt_info
6219 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6221 if (slp_node)
6222 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6225 if (slp_node)
6226 continue;
6228 if (j == 0)
6229 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6230 else
6231 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6232 prev_stmt_info = new_stmt_info;
6235 vec_oprnds0.release ();
6236 vec_oprnds1.release ();
6237 vec_oprnds2.release ();
6239 return true;
6242 /* A helper function to ensure data reference DR_INFO's base alignment. */
6244 static void
6245 ensure_base_align (dr_vec_info *dr_info)
6247 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6248 return;
6250 if (dr_info->base_misaligned)
6252 tree base_decl = dr_info->base_decl;
6254 // We should only be able to increase the alignment of a base object if
6255 // we know what its new alignment should be at compile time.
6256 unsigned HOST_WIDE_INT align_base_to =
6257 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6259 if (decl_in_symtab_p (base_decl))
6260 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6261 else
6263 SET_DECL_ALIGN (base_decl, align_base_to);
6264 DECL_USER_ALIGN (base_decl) = 1;
6266 dr_info->base_misaligned = false;
6271 /* Function get_group_alias_ptr_type.
6273 Return the alias type for the group starting at FIRST_STMT_INFO. */
6275 static tree
6276 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6278 struct data_reference *first_dr, *next_dr;
6280 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6281 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6282 while (next_stmt_info)
6284 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6285 if (get_alias_set (DR_REF (first_dr))
6286 != get_alias_set (DR_REF (next_dr)))
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_NOTE, vect_location,
6290 "conflicting alias set types.\n");
6291 return ptr_type_node;
6293 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6295 return reference_alias_ptr_type (DR_REF (first_dr));
6299 /* Function vectorizable_store.
6301 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6302 that can be vectorized.
6303 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6304 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6305 Return true if STMT_INFO is vectorizable in this way. */
6307 static bool
6308 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6309 stmt_vec_info *vec_stmt, slp_tree slp_node,
6310 stmt_vector_for_cost *cost_vec)
6312 tree data_ref;
6313 tree op;
6314 tree vec_oprnd = NULL_TREE;
6315 tree elem_type;
6316 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6317 struct loop *loop = NULL;
6318 machine_mode vec_mode;
6319 tree dummy;
6320 enum dr_alignment_support alignment_support_scheme;
6321 enum vect_def_type rhs_dt = vect_unknown_def_type;
6322 enum vect_def_type mask_dt = vect_unknown_def_type;
6323 stmt_vec_info prev_stmt_info = NULL;
6324 tree dataref_ptr = NULL_TREE;
6325 tree dataref_offset = NULL_TREE;
6326 gimple *ptr_incr = NULL;
6327 int ncopies;
6328 int j;
6329 stmt_vec_info first_stmt_info;
6330 bool grouped_store;
6331 unsigned int group_size, i;
6332 vec<tree> oprnds = vNULL;
6333 vec<tree> result_chain = vNULL;
6334 tree offset = NULL_TREE;
6335 vec<tree> vec_oprnds = vNULL;
6336 bool slp = (slp_node != NULL);
6337 unsigned int vec_num;
6338 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6339 vec_info *vinfo = stmt_info->vinfo;
6340 tree aggr_type;
6341 gather_scatter_info gs_info;
6342 poly_uint64 vf;
6343 vec_load_store_type vls_type;
6344 tree ref_type;
6346 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6347 return false;
6349 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6350 && ! vec_stmt)
6351 return false;
6353 /* Is vectorizable store? */
6355 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6356 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6358 tree scalar_dest = gimple_assign_lhs (assign);
6359 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6360 && is_pattern_stmt_p (stmt_info))
6361 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6362 if (TREE_CODE (scalar_dest) != ARRAY_REF
6363 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6364 && TREE_CODE (scalar_dest) != INDIRECT_REF
6365 && TREE_CODE (scalar_dest) != COMPONENT_REF
6366 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6367 && TREE_CODE (scalar_dest) != REALPART_EXPR
6368 && TREE_CODE (scalar_dest) != MEM_REF)
6369 return false;
6371 else
6373 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6374 if (!call || !gimple_call_internal_p (call))
6375 return false;
6377 internal_fn ifn = gimple_call_internal_fn (call);
6378 if (!internal_store_fn_p (ifn))
6379 return false;
6381 if (slp_node != NULL)
6383 if (dump_enabled_p ())
6384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6385 "SLP of masked stores not supported.\n");
6386 return false;
6389 int mask_index = internal_fn_mask_index (ifn);
6390 if (mask_index >= 0)
6392 mask = gimple_call_arg (call, mask_index);
6393 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6394 &mask_vectype))
6395 return false;
6399 op = vect_get_store_rhs (stmt_info);
6401 /* Cannot have hybrid store SLP -- that would mean storing to the
6402 same location twice. */
6403 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6405 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6406 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6408 if (loop_vinfo)
6410 loop = LOOP_VINFO_LOOP (loop_vinfo);
6411 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6413 else
6414 vf = 1;
6416 /* Multiple types in SLP are handled by creating the appropriate number of
6417 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6418 case of SLP. */
6419 if (slp)
6420 ncopies = 1;
6421 else
6422 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6424 gcc_assert (ncopies >= 1);
6426 /* FORNOW. This restriction should be relaxed. */
6427 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6429 if (dump_enabled_p ())
6430 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6431 "multiple types in nested loop.\n");
6432 return false;
6435 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6436 return false;
6438 elem_type = TREE_TYPE (vectype);
6439 vec_mode = TYPE_MODE (vectype);
6441 if (!STMT_VINFO_DATA_REF (stmt_info))
6442 return false;
6444 vect_memory_access_type memory_access_type;
6445 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6446 &memory_access_type, &gs_info))
6447 return false;
6449 if (mask)
6451 if (memory_access_type == VMAT_CONTIGUOUS)
6453 if (!VECTOR_MODE_P (vec_mode)
6454 || !can_vec_mask_load_store_p (vec_mode,
6455 TYPE_MODE (mask_vectype), false))
6456 return false;
6458 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6459 && (memory_access_type != VMAT_GATHER_SCATTER
6460 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
6462 if (dump_enabled_p ())
6463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6464 "unsupported access type for masked store.\n");
6465 return false;
6468 else
6470 /* FORNOW. In some cases can vectorize even if data-type not supported
6471 (e.g. - array initialization with 0). */
6472 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6473 return false;
6476 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
6477 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6478 && memory_access_type != VMAT_GATHER_SCATTER
6479 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6480 if (grouped_store)
6482 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6483 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6484 group_size = DR_GROUP_SIZE (first_stmt_info);
6486 else
6488 first_stmt_info = stmt_info;
6489 first_dr_info = dr_info;
6490 group_size = vec_num = 1;
6493 if (!vec_stmt) /* transformation not required. */
6495 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6497 if (loop_vinfo
6498 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6499 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6500 memory_access_type, &gs_info);
6502 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6503 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6504 vls_type, slp_node, cost_vec);
6505 return true;
6507 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6509 /* Transform. */
6511 ensure_base_align (dr_info);
6513 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6515 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6516 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6517 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6518 tree ptr, var, scale, vec_mask;
6519 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
6520 tree mask_halfvectype = mask_vectype;
6521 edge pe = loop_preheader_edge (loop);
6522 gimple_seq seq;
6523 basic_block new_bb;
6524 enum { NARROW, NONE, WIDEN } modifier;
6525 poly_uint64 scatter_off_nunits
6526 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6528 if (known_eq (nunits, scatter_off_nunits))
6529 modifier = NONE;
6530 else if (known_eq (nunits * 2, scatter_off_nunits))
6532 modifier = WIDEN;
6534 /* Currently gathers and scatters are only supported for
6535 fixed-length vectors. */
6536 unsigned int count = scatter_off_nunits.to_constant ();
6537 vec_perm_builder sel (count, count, 1);
6538 for (i = 0; i < (unsigned int) count; ++i)
6539 sel.quick_push (i | (count / 2));
6541 vec_perm_indices indices (sel, 1, count);
6542 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6543 indices);
6544 gcc_assert (perm_mask != NULL_TREE);
6546 else if (known_eq (nunits, scatter_off_nunits * 2))
6548 modifier = NARROW;
6550 /* Currently gathers and scatters are only supported for
6551 fixed-length vectors. */
6552 unsigned int count = nunits.to_constant ();
6553 vec_perm_builder sel (count, count, 1);
6554 for (i = 0; i < (unsigned int) count; ++i)
6555 sel.quick_push (i | (count / 2));
6557 vec_perm_indices indices (sel, 2, count);
6558 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6559 gcc_assert (perm_mask != NULL_TREE);
6560 ncopies *= 2;
6562 if (mask)
6563 mask_halfvectype
6564 = build_same_sized_truth_vector_type (gs_info.offset_vectype);
6566 else
6567 gcc_unreachable ();
6569 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6570 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6571 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6572 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6573 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6574 scaletype = TREE_VALUE (arglist);
6576 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6577 && TREE_CODE (rettype) == VOID_TYPE);
6579 ptr = fold_convert (ptrtype, gs_info.base);
6580 if (!is_gimple_min_invariant (ptr))
6582 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6583 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6584 gcc_assert (!new_bb);
6587 if (mask == NULL_TREE)
6589 mask_arg = build_int_cst (masktype, -1);
6590 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
6593 scale = build_int_cst (scaletype, gs_info.scale);
6595 prev_stmt_info = NULL;
6596 for (j = 0; j < ncopies; ++j)
6598 if (j == 0)
6600 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
6601 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
6602 stmt_info);
6603 if (mask)
6604 mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
6605 stmt_info);
6607 else if (modifier != NONE && (j & 1))
6609 if (modifier == WIDEN)
6612 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6613 vec_oprnd1);
6614 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6615 stmt_info, gsi);
6616 if (mask)
6617 mask_op
6618 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6619 vec_mask);
6621 else if (modifier == NARROW)
6623 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6624 stmt_info, gsi);
6625 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6626 vec_oprnd0);
6628 else
6629 gcc_unreachable ();
6631 else
6633 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6634 vec_oprnd1);
6635 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6636 vec_oprnd0);
6637 if (mask)
6638 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6639 vec_mask);
6642 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6644 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6645 TYPE_VECTOR_SUBPARTS (srctype)));
6646 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6647 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6648 gassign *new_stmt
6649 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6650 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6651 src = var;
6654 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6656 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6657 TYPE_VECTOR_SUBPARTS (idxtype)));
6658 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6659 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6660 gassign *new_stmt
6661 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6662 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6663 op = var;
6666 if (mask)
6668 tree utype;
6669 mask_arg = mask_op;
6670 if (modifier == NARROW)
6672 var = vect_get_new_ssa_name (mask_halfvectype,
6673 vect_simple_var);
6674 gassign *new_stmt
6675 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
6676 : VEC_UNPACK_LO_EXPR,
6677 mask_op);
6678 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6679 mask_arg = var;
6681 tree optype = TREE_TYPE (mask_arg);
6682 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
6683 utype = masktype;
6684 else
6685 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
6686 var = vect_get_new_ssa_name (utype, vect_scalar_var);
6687 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
6688 gassign *new_stmt
6689 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
6690 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6691 mask_arg = var;
6692 if (!useless_type_conversion_p (masktype, utype))
6694 gcc_assert (TYPE_PRECISION (utype)
6695 <= TYPE_PRECISION (masktype));
6696 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
6697 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
6698 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6699 mask_arg = var;
6703 gcall *new_stmt
6704 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
6705 stmt_vec_info new_stmt_info
6706 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6708 if (prev_stmt_info == NULL)
6709 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6710 else
6711 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6712 prev_stmt_info = new_stmt_info;
6714 return true;
6717 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6718 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6720 if (grouped_store)
6722 /* FORNOW */
6723 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6725 /* We vectorize all the stmts of the interleaving group when we
6726 reach the last stmt in the group. */
6727 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6728 < DR_GROUP_SIZE (first_stmt_info)
6729 && !slp)
6731 *vec_stmt = NULL;
6732 return true;
6735 if (slp)
6737 grouped_store = false;
6738 /* VEC_NUM is the number of vect stmts to be created for this
6739 group. */
6740 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6741 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6742 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6743 == first_stmt_info);
6744 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6745 op = vect_get_store_rhs (first_stmt_info);
6747 else
6748 /* VEC_NUM is the number of vect stmts to be created for this
6749 group. */
6750 vec_num = group_size;
6752 ref_type = get_group_alias_ptr_type (first_stmt_info);
6754 else
6755 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
6757 if (dump_enabled_p ())
6758 dump_printf_loc (MSG_NOTE, vect_location,
6759 "transform store. ncopies = %d\n", ncopies);
6761 if (memory_access_type == VMAT_ELEMENTWISE
6762 || memory_access_type == VMAT_STRIDED_SLP)
6764 gimple_stmt_iterator incr_gsi;
6765 bool insert_after;
6766 gimple *incr;
6767 tree offvar;
6768 tree ivstep;
6769 tree running_off;
6770 tree stride_base, stride_step, alias_off;
6771 tree vec_oprnd;
6772 unsigned int g;
6773 /* Checked by get_load_store_type. */
6774 unsigned int const_nunits = nunits.to_constant ();
6776 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6777 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6779 stride_base
6780 = fold_build_pointer_plus
6781 (DR_BASE_ADDRESS (first_dr_info->dr),
6782 size_binop (PLUS_EXPR,
6783 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6784 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6785 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
6787 /* For a store with loop-invariant (but other than power-of-2)
6788 stride (i.e. not a grouped access) like so:
6790 for (i = 0; i < n; i += stride)
6791 array[i] = ...;
6793 we generate a new induction variable and new stores from
6794 the components of the (vectorized) rhs:
6796 for (j = 0; ; j += VF*stride)
6797 vectemp = ...;
6798 tmp1 = vectemp[0];
6799 array[j] = tmp1;
6800 tmp2 = vectemp[1];
6801 array[j + stride] = tmp2;
6805 unsigned nstores = const_nunits;
6806 unsigned lnel = 1;
6807 tree ltype = elem_type;
6808 tree lvectype = vectype;
6809 if (slp)
6811 if (group_size < const_nunits
6812 && const_nunits % group_size == 0)
6814 nstores = const_nunits / group_size;
6815 lnel = group_size;
6816 ltype = build_vector_type (elem_type, group_size);
6817 lvectype = vectype;
6819 /* First check if vec_extract optab doesn't support extraction
6820 of vector elts directly. */
6821 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6822 machine_mode vmode;
6823 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6824 || !VECTOR_MODE_P (vmode)
6825 || !targetm.vector_mode_supported_p (vmode)
6826 || (convert_optab_handler (vec_extract_optab,
6827 TYPE_MODE (vectype), vmode)
6828 == CODE_FOR_nothing))
6830 /* Try to avoid emitting an extract of vector elements
6831 by performing the extracts using an integer type of the
6832 same size, extracting from a vector of those and then
6833 re-interpreting it as the original vector type if
6834 supported. */
6835 unsigned lsize
6836 = group_size * GET_MODE_BITSIZE (elmode);
6837 unsigned int lnunits = const_nunits / group_size;
6838 /* If we can't construct such a vector fall back to
6839 element extracts from the original vector type and
6840 element size stores. */
6841 if (int_mode_for_size (lsize, 0).exists (&elmode)
6842 && mode_for_vector (elmode, lnunits).exists (&vmode)
6843 && VECTOR_MODE_P (vmode)
6844 && targetm.vector_mode_supported_p (vmode)
6845 && (convert_optab_handler (vec_extract_optab,
6846 vmode, elmode)
6847 != CODE_FOR_nothing))
6849 nstores = lnunits;
6850 lnel = group_size;
6851 ltype = build_nonstandard_integer_type (lsize, 1);
6852 lvectype = build_vector_type (ltype, nstores);
6854 /* Else fall back to vector extraction anyway.
6855 Fewer stores are more important than avoiding spilling
6856 of the vector we extract from. Compared to the
6857 construction case in vectorizable_load no store-forwarding
6858 issue exists here for reasonable archs. */
6861 else if (group_size >= const_nunits
6862 && group_size % const_nunits == 0)
6864 nstores = 1;
6865 lnel = const_nunits;
6866 ltype = vectype;
6867 lvectype = vectype;
6869 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6870 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6873 ivstep = stride_step;
6874 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6875 build_int_cst (TREE_TYPE (ivstep), vf));
6877 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6879 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6880 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6881 create_iv (stride_base, ivstep, NULL,
6882 loop, &incr_gsi, insert_after,
6883 &offvar, NULL);
6884 incr = gsi_stmt (incr_gsi);
6885 loop_vinfo->add_stmt (incr);
6887 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6889 prev_stmt_info = NULL;
6890 alias_off = build_int_cst (ref_type, 0);
6891 stmt_vec_info next_stmt_info = first_stmt_info;
6892 for (g = 0; g < group_size; g++)
6894 running_off = offvar;
6895 if (g)
6897 tree size = TYPE_SIZE_UNIT (ltype);
6898 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6899 size);
6900 tree newoff = copy_ssa_name (running_off, NULL);
6901 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6902 running_off, pos);
6903 vect_finish_stmt_generation (stmt_info, incr, gsi);
6904 running_off = newoff;
6906 unsigned int group_el = 0;
6907 unsigned HOST_WIDE_INT
6908 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6909 for (j = 0; j < ncopies; j++)
6911 /* We've set op and dt above, from vect_get_store_rhs,
6912 and first_stmt_info == stmt_info. */
6913 if (j == 0)
6915 if (slp)
6917 vect_get_vec_defs (op, NULL_TREE, stmt_info,
6918 &vec_oprnds, NULL, slp_node);
6919 vec_oprnd = vec_oprnds[0];
6921 else
6923 op = vect_get_store_rhs (next_stmt_info);
6924 vec_oprnd = vect_get_vec_def_for_operand
6925 (op, next_stmt_info);
6928 else
6930 if (slp)
6931 vec_oprnd = vec_oprnds[j];
6932 else
6933 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6934 vec_oprnd);
6936 /* Pun the vector to extract from if necessary. */
6937 if (lvectype != vectype)
6939 tree tem = make_ssa_name (lvectype);
6940 gimple *pun
6941 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6942 lvectype, vec_oprnd));
6943 vect_finish_stmt_generation (stmt_info, pun, gsi);
6944 vec_oprnd = tem;
6946 for (i = 0; i < nstores; i++)
6948 tree newref, newoff;
6949 gimple *incr, *assign;
6950 tree size = TYPE_SIZE (ltype);
6951 /* Extract the i'th component. */
6952 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6953 bitsize_int (i), size);
6954 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6955 size, pos);
6957 elem = force_gimple_operand_gsi (gsi, elem, true,
6958 NULL_TREE, true,
6959 GSI_SAME_STMT);
6961 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6962 group_el * elsz);
6963 newref = build2 (MEM_REF, ltype,
6964 running_off, this_off);
6965 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
6967 /* And store it to *running_off. */
6968 assign = gimple_build_assign (newref, elem);
6969 stmt_vec_info assign_info
6970 = vect_finish_stmt_generation (stmt_info, assign, gsi);
6972 group_el += lnel;
6973 if (! slp
6974 || group_el == group_size)
6976 newoff = copy_ssa_name (running_off, NULL);
6977 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6978 running_off, stride_step);
6979 vect_finish_stmt_generation (stmt_info, incr, gsi);
6981 running_off = newoff;
6982 group_el = 0;
6984 if (g == group_size - 1
6985 && !slp)
6987 if (j == 0 && i == 0)
6988 STMT_VINFO_VEC_STMT (stmt_info)
6989 = *vec_stmt = assign_info;
6990 else
6991 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6992 prev_stmt_info = assign_info;
6996 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6997 if (slp)
6998 break;
7001 vec_oprnds.release ();
7002 return true;
7005 auto_vec<tree> dr_chain (group_size);
7006 oprnds.create (group_size);
7008 alignment_support_scheme
7009 = vect_supportable_dr_alignment (first_dr_info, false);
7010 gcc_assert (alignment_support_scheme);
7011 vec_loop_masks *loop_masks
7012 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7013 ? &LOOP_VINFO_MASKS (loop_vinfo)
7014 : NULL);
7015 /* Targets with store-lane instructions must not require explicit
7016 realignment. vect_supportable_dr_alignment always returns either
7017 dr_aligned or dr_unaligned_supported for masked operations. */
7018 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7019 && !mask
7020 && !loop_masks)
7021 || alignment_support_scheme == dr_aligned
7022 || alignment_support_scheme == dr_unaligned_supported);
7024 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7025 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7026 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7028 tree bump;
7029 tree vec_offset = NULL_TREE;
7030 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7032 aggr_type = NULL_TREE;
7033 bump = NULL_TREE;
7035 else if (memory_access_type == VMAT_GATHER_SCATTER)
7037 aggr_type = elem_type;
7038 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7039 &bump, &vec_offset);
7041 else
7043 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7044 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7045 else
7046 aggr_type = vectype;
7047 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
7048 memory_access_type);
7051 if (mask)
7052 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7054 /* In case the vectorization factor (VF) is bigger than the number
7055 of elements that we can fit in a vectype (nunits), we have to generate
7056 more than one vector stmt - i.e - we need to "unroll" the
7057 vector stmt by a factor VF/nunits. For more details see documentation in
7058 vect_get_vec_def_for_copy_stmt. */
7060 /* In case of interleaving (non-unit grouped access):
7062 S1: &base + 2 = x2
7063 S2: &base = x0
7064 S3: &base + 1 = x1
7065 S4: &base + 3 = x3
7067 We create vectorized stores starting from base address (the access of the
7068 first stmt in the chain (S2 in the above example), when the last store stmt
7069 of the chain (S4) is reached:
7071 VS1: &base = vx2
7072 VS2: &base + vec_size*1 = vx0
7073 VS3: &base + vec_size*2 = vx1
7074 VS4: &base + vec_size*3 = vx3
7076 Then permutation statements are generated:
7078 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7079 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7082 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7083 (the order of the data-refs in the output of vect_permute_store_chain
7084 corresponds to the order of scalar stmts in the interleaving chain - see
7085 the documentation of vect_permute_store_chain()).
7087 In case of both multiple types and interleaving, above vector stores and
7088 permutation stmts are created for every copy. The result vector stmts are
7089 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7090 STMT_VINFO_RELATED_STMT for the next copies.
7093 prev_stmt_info = NULL;
7094 tree vec_mask = NULL_TREE;
7095 for (j = 0; j < ncopies; j++)
7097 stmt_vec_info new_stmt_info;
7098 if (j == 0)
7100 if (slp)
7102 /* Get vectorized arguments for SLP_NODE. */
7103 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
7104 NULL, slp_node);
7106 vec_oprnd = vec_oprnds[0];
7108 else
7110 /* For interleaved stores we collect vectorized defs for all the
7111 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7112 used as an input to vect_permute_store_chain(), and OPRNDS as
7113 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
7115 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7116 OPRNDS are of size 1. */
7117 stmt_vec_info next_stmt_info = first_stmt_info;
7118 for (i = 0; i < group_size; i++)
7120 /* Since gaps are not supported for interleaved stores,
7121 DR_GROUP_SIZE is the exact number of stmts in the chain.
7122 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7123 that there is no interleaving, DR_GROUP_SIZE is 1,
7124 and only one iteration of the loop will be executed. */
7125 op = vect_get_store_rhs (next_stmt_info);
7126 vec_oprnd = vect_get_vec_def_for_operand
7127 (op, next_stmt_info);
7128 dr_chain.quick_push (vec_oprnd);
7129 oprnds.quick_push (vec_oprnd);
7130 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7132 if (mask)
7133 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
7134 mask_vectype);
7137 /* We should have catched mismatched types earlier. */
7138 gcc_assert (useless_type_conversion_p (vectype,
7139 TREE_TYPE (vec_oprnd)));
7140 bool simd_lane_access_p
7141 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7142 if (simd_lane_access_p
7143 && !loop_masks
7144 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7145 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7146 && integer_zerop (DR_OFFSET (first_dr_info->dr))
7147 && integer_zerop (DR_INIT (first_dr_info->dr))
7148 && alias_sets_conflict_p (get_alias_set (aggr_type),
7149 get_alias_set (TREE_TYPE (ref_type))))
7151 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7152 dataref_offset = build_int_cst (ref_type, 0);
7154 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7155 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
7156 &dataref_ptr, &vec_offset);
7157 else
7158 dataref_ptr
7159 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
7160 simd_lane_access_p ? loop : NULL,
7161 offset, &dummy, gsi, &ptr_incr,
7162 simd_lane_access_p, NULL_TREE, bump);
7164 else
7166 /* For interleaved stores we created vectorized defs for all the
7167 defs stored in OPRNDS in the previous iteration (previous copy).
7168 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7169 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7170 next copy.
7171 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7172 OPRNDS are of size 1. */
7173 for (i = 0; i < group_size; i++)
7175 op = oprnds[i];
7176 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
7177 dr_chain[i] = vec_oprnd;
7178 oprnds[i] = vec_oprnd;
7180 if (mask)
7181 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
7182 if (dataref_offset)
7183 dataref_offset
7184 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7185 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7186 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7187 else
7188 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7189 stmt_info, bump);
7192 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7194 tree vec_array;
7196 /* Get an array into which we can store the individual vectors. */
7197 vec_array = create_vector_array (vectype, vec_num);
7199 /* Invalidate the current contents of VEC_ARRAY. This should
7200 become an RTL clobber too, which prevents the vector registers
7201 from being upward-exposed. */
7202 vect_clobber_variable (stmt_info, gsi, vec_array);
7204 /* Store the individual vectors into the array. */
7205 for (i = 0; i < vec_num; i++)
7207 vec_oprnd = dr_chain[i];
7208 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7211 tree final_mask = NULL;
7212 if (loop_masks)
7213 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7214 vectype, j);
7215 if (vec_mask)
7216 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7217 vec_mask, gsi);
7219 gcall *call;
7220 if (final_mask)
7222 /* Emit:
7223 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7224 VEC_ARRAY). */
7225 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7226 tree alias_ptr = build_int_cst (ref_type, align);
7227 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7228 dataref_ptr, alias_ptr,
7229 final_mask, vec_array);
7231 else
7233 /* Emit:
7234 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7235 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7236 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7237 vec_array);
7238 gimple_call_set_lhs (call, data_ref);
7240 gimple_call_set_nothrow (call, true);
7241 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7243 /* Record that VEC_ARRAY is now dead. */
7244 vect_clobber_variable (stmt_info, gsi, vec_array);
7246 else
7248 new_stmt_info = NULL;
7249 if (grouped_store)
7251 if (j == 0)
7252 result_chain.create (group_size);
7253 /* Permute. */
7254 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7255 &result_chain);
7258 stmt_vec_info next_stmt_info = first_stmt_info;
7259 for (i = 0; i < vec_num; i++)
7261 unsigned misalign;
7262 unsigned HOST_WIDE_INT align;
7264 tree final_mask = NULL_TREE;
7265 if (loop_masks)
7266 final_mask = vect_get_loop_mask (gsi, loop_masks,
7267 vec_num * ncopies,
7268 vectype, vec_num * j + i);
7269 if (vec_mask)
7270 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7271 vec_mask, gsi);
7273 if (memory_access_type == VMAT_GATHER_SCATTER)
7275 tree scale = size_int (gs_info.scale);
7276 gcall *call;
7277 if (loop_masks)
7278 call = gimple_build_call_internal
7279 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7280 scale, vec_oprnd, final_mask);
7281 else
7282 call = gimple_build_call_internal
7283 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7284 scale, vec_oprnd);
7285 gimple_call_set_nothrow (call, true);
7286 new_stmt_info
7287 = vect_finish_stmt_generation (stmt_info, call, gsi);
7288 break;
7291 if (i > 0)
7292 /* Bump the vector pointer. */
7293 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7294 stmt_info, bump);
7296 if (slp)
7297 vec_oprnd = vec_oprnds[i];
7298 else if (grouped_store)
7299 /* For grouped stores vectorized defs are interleaved in
7300 vect_permute_store_chain(). */
7301 vec_oprnd = result_chain[i];
7303 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
7304 if (aligned_access_p (first_dr_info))
7305 misalign = 0;
7306 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7308 align = dr_alignment (vect_dr_behavior (first_dr_info));
7309 misalign = 0;
7311 else
7312 misalign = DR_MISALIGNMENT (first_dr_info);
7313 if (dataref_offset == NULL_TREE
7314 && TREE_CODE (dataref_ptr) == SSA_NAME)
7315 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7316 misalign);
7318 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7320 tree perm_mask = perm_mask_for_reverse (vectype);
7321 tree perm_dest = vect_create_destination_var
7322 (vect_get_store_rhs (stmt_info), vectype);
7323 tree new_temp = make_ssa_name (perm_dest);
7325 /* Generate the permute statement. */
7326 gimple *perm_stmt
7327 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7328 vec_oprnd, perm_mask);
7329 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7331 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7332 vec_oprnd = new_temp;
7335 /* Arguments are ready. Create the new vector stmt. */
7336 if (final_mask)
7338 align = least_bit_hwi (misalign | align);
7339 tree ptr = build_int_cst (ref_type, align);
7340 gcall *call
7341 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7342 dataref_ptr, ptr,
7343 final_mask, vec_oprnd);
7344 gimple_call_set_nothrow (call, true);
7345 new_stmt_info
7346 = vect_finish_stmt_generation (stmt_info, call, gsi);
7348 else
7350 data_ref = fold_build2 (MEM_REF, vectype,
7351 dataref_ptr,
7352 dataref_offset
7353 ? dataref_offset
7354 : build_int_cst (ref_type, 0));
7355 if (aligned_access_p (first_dr_info))
7357 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7358 TREE_TYPE (data_ref)
7359 = build_aligned_type (TREE_TYPE (data_ref),
7360 align * BITS_PER_UNIT);
7361 else
7362 TREE_TYPE (data_ref)
7363 = build_aligned_type (TREE_TYPE (data_ref),
7364 TYPE_ALIGN (elem_type));
7365 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7366 gassign *new_stmt
7367 = gimple_build_assign (data_ref, vec_oprnd);
7368 new_stmt_info
7369 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7372 if (slp)
7373 continue;
7375 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7376 if (!next_stmt_info)
7377 break;
7380 if (!slp)
7382 if (j == 0)
7383 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7384 else
7385 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7386 prev_stmt_info = new_stmt_info;
7390 oprnds.release ();
7391 result_chain.release ();
7392 vec_oprnds.release ();
7394 return true;
7397 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7398 VECTOR_CST mask. No checks are made that the target platform supports the
7399 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7400 vect_gen_perm_mask_checked. */
7402 tree
7403 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7405 tree mask_type;
7407 poly_uint64 nunits = sel.length ();
7408 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7410 mask_type = build_vector_type (ssizetype, nunits);
7411 return vec_perm_indices_to_tree (mask_type, sel);
7414 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7415 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7417 tree
7418 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7420 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7421 return vect_gen_perm_mask_any (vectype, sel);
7424 /* Given a vector variable X and Y, that was generated for the scalar
7425 STMT_INFO, generate instructions to permute the vector elements of X and Y
7426 using permutation mask MASK_VEC, insert them at *GSI and return the
7427 permuted vector variable. */
7429 static tree
7430 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7431 gimple_stmt_iterator *gsi)
7433 tree vectype = TREE_TYPE (x);
7434 tree perm_dest, data_ref;
7435 gimple *perm_stmt;
7437 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7438 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
7439 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7440 else
7441 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7442 data_ref = make_ssa_name (perm_dest);
7444 /* Generate the permute statement. */
7445 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7446 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7448 return data_ref;
7451 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7452 inserting them on the loops preheader edge. Returns true if we
7453 were successful in doing so (and thus STMT_INFO can be moved then),
7454 otherwise returns false. */
7456 static bool
7457 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7459 ssa_op_iter i;
7460 tree op;
7461 bool any = false;
7463 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7465 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7466 if (!gimple_nop_p (def_stmt)
7467 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7469 /* Make sure we don't need to recurse. While we could do
7470 so in simple cases when there are more complex use webs
7471 we don't have an easy way to preserve stmt order to fulfil
7472 dependencies within them. */
7473 tree op2;
7474 ssa_op_iter i2;
7475 if (gimple_code (def_stmt) == GIMPLE_PHI)
7476 return false;
7477 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7479 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7480 if (!gimple_nop_p (def_stmt2)
7481 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7482 return false;
7484 any = true;
7488 if (!any)
7489 return true;
7491 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7493 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7494 if (!gimple_nop_p (def_stmt)
7495 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7497 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7498 gsi_remove (&gsi, false);
7499 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7503 return true;
7506 /* vectorizable_load.
7508 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7509 that can be vectorized.
7510 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7511 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7512 Return true if STMT_INFO is vectorizable in this way. */
7514 static bool
7515 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7516 stmt_vec_info *vec_stmt, slp_tree slp_node,
7517 slp_instance slp_node_instance,
7518 stmt_vector_for_cost *cost_vec)
7520 tree scalar_dest;
7521 tree vec_dest = NULL;
7522 tree data_ref = NULL;
7523 stmt_vec_info prev_stmt_info;
7524 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7525 struct loop *loop = NULL;
7526 struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7527 bool nested_in_vect_loop = false;
7528 tree elem_type;
7529 tree new_temp;
7530 machine_mode mode;
7531 tree dummy;
7532 enum dr_alignment_support alignment_support_scheme;
7533 tree dataref_ptr = NULL_TREE;
7534 tree dataref_offset = NULL_TREE;
7535 gimple *ptr_incr = NULL;
7536 int ncopies;
7537 int i, j;
7538 unsigned int group_size;
7539 poly_uint64 group_gap_adj;
7540 tree msq = NULL_TREE, lsq;
7541 tree offset = NULL_TREE;
7542 tree byte_offset = NULL_TREE;
7543 tree realignment_token = NULL_TREE;
7544 gphi *phi = NULL;
7545 vec<tree> dr_chain = vNULL;
7546 bool grouped_load = false;
7547 stmt_vec_info first_stmt_info;
7548 stmt_vec_info first_stmt_info_for_drptr = NULL;
7549 bool compute_in_loop = false;
7550 struct loop *at_loop;
7551 int vec_num;
7552 bool slp = (slp_node != NULL);
7553 bool slp_perm = false;
7554 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7555 poly_uint64 vf;
7556 tree aggr_type;
7557 gather_scatter_info gs_info;
7558 vec_info *vinfo = stmt_info->vinfo;
7559 tree ref_type;
7560 enum vect_def_type mask_dt = vect_unknown_def_type;
7562 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7563 return false;
7565 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7566 && ! vec_stmt)
7567 return false;
7569 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7570 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7572 scalar_dest = gimple_assign_lhs (assign);
7573 if (TREE_CODE (scalar_dest) != SSA_NAME)
7574 return false;
7576 tree_code code = gimple_assign_rhs_code (assign);
7577 if (code != ARRAY_REF
7578 && code != BIT_FIELD_REF
7579 && code != INDIRECT_REF
7580 && code != COMPONENT_REF
7581 && code != IMAGPART_EXPR
7582 && code != REALPART_EXPR
7583 && code != MEM_REF
7584 && TREE_CODE_CLASS (code) != tcc_declaration)
7585 return false;
7587 else
7589 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7590 if (!call || !gimple_call_internal_p (call))
7591 return false;
7593 internal_fn ifn = gimple_call_internal_fn (call);
7594 if (!internal_load_fn_p (ifn))
7595 return false;
7597 scalar_dest = gimple_call_lhs (call);
7598 if (!scalar_dest)
7599 return false;
7601 if (slp_node != NULL)
7603 if (dump_enabled_p ())
7604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7605 "SLP of masked loads not supported.\n");
7606 return false;
7609 int mask_index = internal_fn_mask_index (ifn);
7610 if (mask_index >= 0)
7612 mask = gimple_call_arg (call, mask_index);
7613 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7614 &mask_vectype))
7615 return false;
7619 if (!STMT_VINFO_DATA_REF (stmt_info))
7620 return false;
7622 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7623 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7625 if (loop_vinfo)
7627 loop = LOOP_VINFO_LOOP (loop_vinfo);
7628 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7629 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7631 else
7632 vf = 1;
7634 /* Multiple types in SLP are handled by creating the appropriate number of
7635 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7636 case of SLP. */
7637 if (slp)
7638 ncopies = 1;
7639 else
7640 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7642 gcc_assert (ncopies >= 1);
7644 /* FORNOW. This restriction should be relaxed. */
7645 if (nested_in_vect_loop && ncopies > 1)
7647 if (dump_enabled_p ())
7648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7649 "multiple types in nested loop.\n");
7650 return false;
7653 /* Invalidate assumptions made by dependence analysis when vectorization
7654 on the unrolled body effectively re-orders stmts. */
7655 if (ncopies > 1
7656 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7657 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7658 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7660 if (dump_enabled_p ())
7661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7662 "cannot perform implicit CSE when unrolling "
7663 "with negative dependence distance\n");
7664 return false;
7667 elem_type = TREE_TYPE (vectype);
7668 mode = TYPE_MODE (vectype);
7670 /* FORNOW. In some cases can vectorize even if data-type not supported
7671 (e.g. - data copies). */
7672 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7674 if (dump_enabled_p ())
7675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7676 "Aligned load, but unsupported type.\n");
7677 return false;
7680 /* Check if the load is a part of an interleaving chain. */
7681 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7683 grouped_load = true;
7684 /* FORNOW */
7685 gcc_assert (!nested_in_vect_loop);
7686 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7688 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7689 group_size = DR_GROUP_SIZE (first_stmt_info);
7691 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7692 slp_perm = true;
7694 /* Invalidate assumptions made by dependence analysis when vectorization
7695 on the unrolled body effectively re-orders stmts. */
7696 if (!PURE_SLP_STMT (stmt_info)
7697 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7698 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7699 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7701 if (dump_enabled_p ())
7702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7703 "cannot perform implicit CSE when performing "
7704 "group loads with negative dependence distance\n");
7705 return false;
7708 else
7709 group_size = 1;
7711 vect_memory_access_type memory_access_type;
7712 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7713 &memory_access_type, &gs_info))
7714 return false;
7716 if (mask)
7718 if (memory_access_type == VMAT_CONTIGUOUS)
7720 machine_mode vec_mode = TYPE_MODE (vectype);
7721 if (!VECTOR_MODE_P (vec_mode)
7722 || !can_vec_mask_load_store_p (vec_mode,
7723 TYPE_MODE (mask_vectype), true))
7724 return false;
7726 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7727 && memory_access_type != VMAT_GATHER_SCATTER)
7729 if (dump_enabled_p ())
7730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7731 "unsupported access type for masked load.\n");
7732 return false;
7736 if (!vec_stmt) /* transformation not required. */
7738 if (!slp)
7739 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7741 if (loop_vinfo
7742 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7743 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7744 memory_access_type, &gs_info);
7746 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7747 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7748 slp_node_instance, slp_node, cost_vec);
7749 return true;
7752 if (!slp)
7753 gcc_assert (memory_access_type
7754 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7756 if (dump_enabled_p ())
7757 dump_printf_loc (MSG_NOTE, vect_location,
7758 "transform load. ncopies = %d\n", ncopies);
7760 /* Transform. */
7762 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7763 ensure_base_align (dr_info);
7765 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7767 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7768 return true;
7771 if (memory_access_type == VMAT_INVARIANT)
7773 gcc_assert (!grouped_load && !mask && !bb_vinfo);
7774 /* If we have versioned for aliasing or the loop doesn't
7775 have any data dependencies that would preclude this,
7776 then we are sure this is a loop invariant load and
7777 thus we can insert it on the preheader edge. */
7778 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7779 && !nested_in_vect_loop
7780 && hoist_defs_of_uses (stmt_info, loop));
7781 if (hoist_p)
7783 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7784 if (dump_enabled_p ())
7785 dump_printf_loc (MSG_NOTE, vect_location,
7786 "hoisting out of the vectorized loop: %G", stmt);
7787 scalar_dest = copy_ssa_name (scalar_dest);
7788 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7789 gsi_insert_on_edge_immediate
7790 (loop_preheader_edge (loop),
7791 gimple_build_assign (scalar_dest, rhs));
7793 /* These copies are all equivalent, but currently the representation
7794 requires a separate STMT_VINFO_VEC_STMT for each one. */
7795 prev_stmt_info = NULL;
7796 gimple_stmt_iterator gsi2 = *gsi;
7797 gsi_next (&gsi2);
7798 for (j = 0; j < ncopies; j++)
7800 stmt_vec_info new_stmt_info;
7801 if (hoist_p)
7803 new_temp = vect_init_vector (stmt_info, scalar_dest,
7804 vectype, NULL);
7805 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7806 new_stmt_info = vinfo->add_stmt (new_stmt);
7808 else
7810 new_temp = vect_init_vector (stmt_info, scalar_dest,
7811 vectype, &gsi2);
7812 new_stmt_info = vinfo->lookup_def (new_temp);
7814 if (slp)
7815 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7816 else if (j == 0)
7817 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7818 else
7819 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7820 prev_stmt_info = new_stmt_info;
7822 return true;
7825 if (memory_access_type == VMAT_ELEMENTWISE
7826 || memory_access_type == VMAT_STRIDED_SLP)
7828 gimple_stmt_iterator incr_gsi;
7829 bool insert_after;
7830 gimple *incr;
7831 tree offvar;
7832 tree ivstep;
7833 tree running_off;
7834 vec<constructor_elt, va_gc> *v = NULL;
7835 tree stride_base, stride_step, alias_off;
7836 /* Checked by get_load_store_type. */
7837 unsigned int const_nunits = nunits.to_constant ();
7838 unsigned HOST_WIDE_INT cst_offset = 0;
7840 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7841 gcc_assert (!nested_in_vect_loop);
7843 if (grouped_load)
7845 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7846 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7848 else
7850 first_stmt_info = stmt_info;
7851 first_dr_info = dr_info;
7853 if (slp && grouped_load)
7855 group_size = DR_GROUP_SIZE (first_stmt_info);
7856 ref_type = get_group_alias_ptr_type (first_stmt_info);
7858 else
7860 if (grouped_load)
7861 cst_offset
7862 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7863 * vect_get_place_in_interleaving_chain (stmt_info,
7864 first_stmt_info));
7865 group_size = 1;
7866 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7869 stride_base
7870 = fold_build_pointer_plus
7871 (DR_BASE_ADDRESS (first_dr_info->dr),
7872 size_binop (PLUS_EXPR,
7873 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7874 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7875 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7877 /* For a load with loop-invariant (but other than power-of-2)
7878 stride (i.e. not a grouped access) like so:
7880 for (i = 0; i < n; i += stride)
7881 ... = array[i];
7883 we generate a new induction variable and new accesses to
7884 form a new vector (or vectors, depending on ncopies):
7886 for (j = 0; ; j += VF*stride)
7887 tmp1 = array[j];
7888 tmp2 = array[j + stride];
7890 vectemp = {tmp1, tmp2, ...}
7893 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7894 build_int_cst (TREE_TYPE (stride_step), vf));
7896 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7898 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7899 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7900 create_iv (stride_base, ivstep, NULL,
7901 loop, &incr_gsi, insert_after,
7902 &offvar, NULL);
7903 incr = gsi_stmt (incr_gsi);
7904 loop_vinfo->add_stmt (incr);
7906 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7908 prev_stmt_info = NULL;
7909 running_off = offvar;
7910 alias_off = build_int_cst (ref_type, 0);
7911 int nloads = const_nunits;
7912 int lnel = 1;
7913 tree ltype = TREE_TYPE (vectype);
7914 tree lvectype = vectype;
7915 auto_vec<tree> dr_chain;
7916 if (memory_access_type == VMAT_STRIDED_SLP)
7918 if (group_size < const_nunits)
7920 /* First check if vec_init optab supports construction from
7921 vector elts directly. */
7922 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7923 machine_mode vmode;
7924 if (mode_for_vector (elmode, group_size).exists (&vmode)
7925 && VECTOR_MODE_P (vmode)
7926 && targetm.vector_mode_supported_p (vmode)
7927 && (convert_optab_handler (vec_init_optab,
7928 TYPE_MODE (vectype), vmode)
7929 != CODE_FOR_nothing))
7931 nloads = const_nunits / group_size;
7932 lnel = group_size;
7933 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7935 else
7937 /* Otherwise avoid emitting a constructor of vector elements
7938 by performing the loads using an integer type of the same
7939 size, constructing a vector of those and then
7940 re-interpreting it as the original vector type.
7941 This avoids a huge runtime penalty due to the general
7942 inability to perform store forwarding from smaller stores
7943 to a larger load. */
7944 unsigned lsize
7945 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7946 unsigned int lnunits = const_nunits / group_size;
7947 /* If we can't construct such a vector fall back to
7948 element loads of the original vector type. */
7949 if (int_mode_for_size (lsize, 0).exists (&elmode)
7950 && mode_for_vector (elmode, lnunits).exists (&vmode)
7951 && VECTOR_MODE_P (vmode)
7952 && targetm.vector_mode_supported_p (vmode)
7953 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7954 != CODE_FOR_nothing))
7956 nloads = lnunits;
7957 lnel = group_size;
7958 ltype = build_nonstandard_integer_type (lsize, 1);
7959 lvectype = build_vector_type (ltype, nloads);
7963 else
7965 nloads = 1;
7966 lnel = const_nunits;
7967 ltype = vectype;
7969 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7971 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7972 else if (nloads == 1)
7973 ltype = vectype;
7975 if (slp)
7977 /* For SLP permutation support we need to load the whole group,
7978 not only the number of vector stmts the permutation result
7979 fits in. */
7980 if (slp_perm)
7982 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7983 variable VF. */
7984 unsigned int const_vf = vf.to_constant ();
7985 ncopies = CEIL (group_size * const_vf, const_nunits);
7986 dr_chain.create (ncopies);
7988 else
7989 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7991 unsigned int group_el = 0;
7992 unsigned HOST_WIDE_INT
7993 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7994 for (j = 0; j < ncopies; j++)
7996 if (nloads > 1)
7997 vec_alloc (v, nloads);
7998 stmt_vec_info new_stmt_info = NULL;
7999 for (i = 0; i < nloads; i++)
8001 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8002 group_el * elsz + cst_offset);
8003 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8004 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8005 gassign *new_stmt
8006 = gimple_build_assign (make_ssa_name (ltype), data_ref);
8007 new_stmt_info
8008 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8009 if (nloads > 1)
8010 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8011 gimple_assign_lhs (new_stmt));
8013 group_el += lnel;
8014 if (! slp
8015 || group_el == group_size)
8017 tree newoff = copy_ssa_name (running_off);
8018 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8019 running_off, stride_step);
8020 vect_finish_stmt_generation (stmt_info, incr, gsi);
8022 running_off = newoff;
8023 group_el = 0;
8026 if (nloads > 1)
8028 tree vec_inv = build_constructor (lvectype, v);
8029 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
8030 new_stmt_info = vinfo->lookup_def (new_temp);
8031 if (lvectype != vectype)
8033 gassign *new_stmt
8034 = gimple_build_assign (make_ssa_name (vectype),
8035 VIEW_CONVERT_EXPR,
8036 build1 (VIEW_CONVERT_EXPR,
8037 vectype, new_temp));
8038 new_stmt_info
8039 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8043 if (slp)
8045 if (slp_perm)
8046 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
8047 else
8048 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8050 else
8052 if (j == 0)
8053 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8054 else
8055 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8056 prev_stmt_info = new_stmt_info;
8059 if (slp_perm)
8061 unsigned n_perms;
8062 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8063 slp_node_instance, false, &n_perms);
8065 return true;
8068 if (memory_access_type == VMAT_GATHER_SCATTER
8069 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8070 grouped_load = false;
8072 if (grouped_load)
8074 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8075 group_size = DR_GROUP_SIZE (first_stmt_info);
8076 /* For SLP vectorization we directly vectorize a subchain
8077 without permutation. */
8078 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8079 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8080 /* For BB vectorization always use the first stmt to base
8081 the data ref pointer on. */
8082 if (bb_vinfo)
8083 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8085 /* Check if the chain of loads is already vectorized. */
8086 if (STMT_VINFO_VEC_STMT (first_stmt_info)
8087 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8088 ??? But we can only do so if there is exactly one
8089 as we have no way to get at the rest. Leave the CSE
8090 opportunity alone.
8091 ??? With the group load eventually participating
8092 in multiple different permutations (having multiple
8093 slp nodes which refer to the same group) the CSE
8094 is even wrong code. See PR56270. */
8095 && !slp)
8097 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8098 return true;
8100 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8101 group_gap_adj = 0;
8103 /* VEC_NUM is the number of vect stmts to be created for this group. */
8104 if (slp)
8106 grouped_load = false;
8107 /* If an SLP permutation is from N elements to N elements,
8108 and if one vector holds a whole number of N, we can load
8109 the inputs to the permutation in the same way as an
8110 unpermuted sequence. In other cases we need to load the
8111 whole group, not only the number of vector stmts the
8112 permutation result fits in. */
8113 if (slp_perm
8114 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
8115 || !multiple_p (nunits, group_size)))
8117 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8118 variable VF; see vect_transform_slp_perm_load. */
8119 unsigned int const_vf = vf.to_constant ();
8120 unsigned int const_nunits = nunits.to_constant ();
8121 vec_num = CEIL (group_size * const_vf, const_nunits);
8122 group_gap_adj = vf * group_size - nunits * vec_num;
8124 else
8126 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8127 group_gap_adj
8128 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
8131 else
8132 vec_num = group_size;
8134 ref_type = get_group_alias_ptr_type (first_stmt_info);
8136 else
8138 first_stmt_info = stmt_info;
8139 first_dr_info = dr_info;
8140 group_size = vec_num = 1;
8141 group_gap_adj = 0;
8142 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8145 alignment_support_scheme
8146 = vect_supportable_dr_alignment (first_dr_info, false);
8147 gcc_assert (alignment_support_scheme);
8148 vec_loop_masks *loop_masks
8149 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8150 ? &LOOP_VINFO_MASKS (loop_vinfo)
8151 : NULL);
8152 /* Targets with store-lane instructions must not require explicit
8153 realignment. vect_supportable_dr_alignment always returns either
8154 dr_aligned or dr_unaligned_supported for masked operations. */
8155 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8156 && !mask
8157 && !loop_masks)
8158 || alignment_support_scheme == dr_aligned
8159 || alignment_support_scheme == dr_unaligned_supported);
8161 /* In case the vectorization factor (VF) is bigger than the number
8162 of elements that we can fit in a vectype (nunits), we have to generate
8163 more than one vector stmt - i.e - we need to "unroll" the
8164 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8165 from one copy of the vector stmt to the next, in the field
8166 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8167 stages to find the correct vector defs to be used when vectorizing
8168 stmts that use the defs of the current stmt. The example below
8169 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8170 need to create 4 vectorized stmts):
8172 before vectorization:
8173 RELATED_STMT VEC_STMT
8174 S1: x = memref - -
8175 S2: z = x + 1 - -
8177 step 1: vectorize stmt S1:
8178 We first create the vector stmt VS1_0, and, as usual, record a
8179 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8180 Next, we create the vector stmt VS1_1, and record a pointer to
8181 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8182 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8183 stmts and pointers:
8184 RELATED_STMT VEC_STMT
8185 VS1_0: vx0 = memref0 VS1_1 -
8186 VS1_1: vx1 = memref1 VS1_2 -
8187 VS1_2: vx2 = memref2 VS1_3 -
8188 VS1_3: vx3 = memref3 - -
8189 S1: x = load - VS1_0
8190 S2: z = x + 1 - -
8192 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8193 information we recorded in RELATED_STMT field is used to vectorize
8194 stmt S2. */
8196 /* In case of interleaving (non-unit grouped access):
8198 S1: x2 = &base + 2
8199 S2: x0 = &base
8200 S3: x1 = &base + 1
8201 S4: x3 = &base + 3
8203 Vectorized loads are created in the order of memory accesses
8204 starting from the access of the first stmt of the chain:
8206 VS1: vx0 = &base
8207 VS2: vx1 = &base + vec_size*1
8208 VS3: vx3 = &base + vec_size*2
8209 VS4: vx4 = &base + vec_size*3
8211 Then permutation statements are generated:
8213 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8214 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8217 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8218 (the order of the data-refs in the output of vect_permute_load_chain
8219 corresponds to the order of scalar stmts in the interleaving chain - see
8220 the documentation of vect_permute_load_chain()).
8221 The generation of permutation stmts and recording them in
8222 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8224 In case of both multiple types and interleaving, the vector loads and
8225 permutation stmts above are created for every copy. The result vector
8226 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8227 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8229 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8230 on a target that supports unaligned accesses (dr_unaligned_supported)
8231 we generate the following code:
8232 p = initial_addr;
8233 indx = 0;
8234 loop {
8235 p = p + indx * vectype_size;
8236 vec_dest = *(p);
8237 indx = indx + 1;
8240 Otherwise, the data reference is potentially unaligned on a target that
8241 does not support unaligned accesses (dr_explicit_realign_optimized) -
8242 then generate the following code, in which the data in each iteration is
8243 obtained by two vector loads, one from the previous iteration, and one
8244 from the current iteration:
8245 p1 = initial_addr;
8246 msq_init = *(floor(p1))
8247 p2 = initial_addr + VS - 1;
8248 realignment_token = call target_builtin;
8249 indx = 0;
8250 loop {
8251 p2 = p2 + indx * vectype_size
8252 lsq = *(floor(p2))
8253 vec_dest = realign_load (msq, lsq, realignment_token)
8254 indx = indx + 1;
8255 msq = lsq;
8256 } */
8258 /* If the misalignment remains the same throughout the execution of the
8259 loop, we can create the init_addr and permutation mask at the loop
8260 preheader. Otherwise, it needs to be created inside the loop.
8261 This can only occur when vectorizing memory accesses in the inner-loop
8262 nested within an outer-loop that is being vectorized. */
8264 if (nested_in_vect_loop
8265 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8266 GET_MODE_SIZE (TYPE_MODE (vectype))))
8268 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8269 compute_in_loop = true;
8272 if ((alignment_support_scheme == dr_explicit_realign_optimized
8273 || alignment_support_scheme == dr_explicit_realign)
8274 && !compute_in_loop)
8276 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8277 alignment_support_scheme, NULL_TREE,
8278 &at_loop);
8279 if (alignment_support_scheme == dr_explicit_realign_optimized)
8281 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8282 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8283 size_one_node);
8286 else
8287 at_loop = loop;
8289 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8290 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8292 tree bump;
8293 tree vec_offset = NULL_TREE;
8294 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8296 aggr_type = NULL_TREE;
8297 bump = NULL_TREE;
8299 else if (memory_access_type == VMAT_GATHER_SCATTER)
8301 aggr_type = elem_type;
8302 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8303 &bump, &vec_offset);
8305 else
8307 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8308 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8309 else
8310 aggr_type = vectype;
8311 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8312 memory_access_type);
8315 tree vec_mask = NULL_TREE;
8316 prev_stmt_info = NULL;
8317 poly_uint64 group_elt = 0;
8318 for (j = 0; j < ncopies; j++)
8320 stmt_vec_info new_stmt_info = NULL;
8321 /* 1. Create the vector or array pointer update chain. */
8322 if (j == 0)
8324 bool simd_lane_access_p
8325 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8326 if (simd_lane_access_p
8327 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8328 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8329 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8330 && integer_zerop (DR_INIT (first_dr_info->dr))
8331 && alias_sets_conflict_p (get_alias_set (aggr_type),
8332 get_alias_set (TREE_TYPE (ref_type)))
8333 && (alignment_support_scheme == dr_aligned
8334 || alignment_support_scheme == dr_unaligned_supported))
8336 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8337 dataref_offset = build_int_cst (ref_type, 0);
8339 else if (first_stmt_info_for_drptr
8340 && first_stmt_info != first_stmt_info_for_drptr)
8342 dataref_ptr
8343 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8344 aggr_type, at_loop, offset, &dummy,
8345 gsi, &ptr_incr, simd_lane_access_p,
8346 byte_offset, bump);
8347 /* Adjust the pointer by the difference to first_stmt. */
8348 data_reference_p ptrdr
8349 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8350 tree diff
8351 = fold_convert (sizetype,
8352 size_binop (MINUS_EXPR,
8353 DR_INIT (first_dr_info->dr),
8354 DR_INIT (ptrdr)));
8355 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8356 stmt_info, diff);
8358 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8359 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8360 &dataref_ptr, &vec_offset);
8361 else
8362 dataref_ptr
8363 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8364 offset, &dummy, gsi, &ptr_incr,
8365 simd_lane_access_p,
8366 byte_offset, bump);
8367 if (mask)
8368 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8369 mask_vectype);
8371 else
8373 if (dataref_offset)
8374 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8375 bump);
8376 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8377 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8378 else
8379 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8380 stmt_info, bump);
8381 if (mask)
8382 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8385 if (grouped_load || slp_perm)
8386 dr_chain.create (vec_num);
8388 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8390 tree vec_array;
8392 vec_array = create_vector_array (vectype, vec_num);
8394 tree final_mask = NULL_TREE;
8395 if (loop_masks)
8396 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8397 vectype, j);
8398 if (vec_mask)
8399 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8400 vec_mask, gsi);
8402 gcall *call;
8403 if (final_mask)
8405 /* Emit:
8406 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8407 VEC_MASK). */
8408 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8409 tree alias_ptr = build_int_cst (ref_type, align);
8410 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8411 dataref_ptr, alias_ptr,
8412 final_mask);
8414 else
8416 /* Emit:
8417 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8418 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8419 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8421 gimple_call_set_lhs (call, vec_array);
8422 gimple_call_set_nothrow (call, true);
8423 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8425 /* Extract each vector into an SSA_NAME. */
8426 for (i = 0; i < vec_num; i++)
8428 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8429 vec_array, i);
8430 dr_chain.quick_push (new_temp);
8433 /* Record the mapping between SSA_NAMEs and statements. */
8434 vect_record_grouped_load_vectors (stmt_info, dr_chain);
8436 /* Record that VEC_ARRAY is now dead. */
8437 vect_clobber_variable (stmt_info, gsi, vec_array);
8439 else
8441 for (i = 0; i < vec_num; i++)
8443 tree final_mask = NULL_TREE;
8444 if (loop_masks
8445 && memory_access_type != VMAT_INVARIANT)
8446 final_mask = vect_get_loop_mask (gsi, loop_masks,
8447 vec_num * ncopies,
8448 vectype, vec_num * j + i);
8449 if (vec_mask)
8450 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8451 vec_mask, gsi);
8453 if (i > 0)
8454 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8455 stmt_info, bump);
8457 /* 2. Create the vector-load in the loop. */
8458 gimple *new_stmt = NULL;
8459 switch (alignment_support_scheme)
8461 case dr_aligned:
8462 case dr_unaligned_supported:
8464 unsigned int misalign;
8465 unsigned HOST_WIDE_INT align;
8467 if (memory_access_type == VMAT_GATHER_SCATTER)
8469 tree scale = size_int (gs_info.scale);
8470 gcall *call;
8471 if (loop_masks)
8472 call = gimple_build_call_internal
8473 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8474 vec_offset, scale, final_mask);
8475 else
8476 call = gimple_build_call_internal
8477 (IFN_GATHER_LOAD, 3, dataref_ptr,
8478 vec_offset, scale);
8479 gimple_call_set_nothrow (call, true);
8480 new_stmt = call;
8481 data_ref = NULL_TREE;
8482 break;
8485 align =
8486 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8487 if (alignment_support_scheme == dr_aligned)
8489 gcc_assert (aligned_access_p (first_dr_info));
8490 misalign = 0;
8492 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8494 align = dr_alignment
8495 (vect_dr_behavior (first_dr_info));
8496 misalign = 0;
8498 else
8499 misalign = DR_MISALIGNMENT (first_dr_info);
8500 if (dataref_offset == NULL_TREE
8501 && TREE_CODE (dataref_ptr) == SSA_NAME)
8502 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8503 align, misalign);
8505 if (final_mask)
8507 align = least_bit_hwi (misalign | align);
8508 tree ptr = build_int_cst (ref_type, align);
8509 gcall *call
8510 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8511 dataref_ptr, ptr,
8512 final_mask);
8513 gimple_call_set_nothrow (call, true);
8514 new_stmt = call;
8515 data_ref = NULL_TREE;
8517 else
8519 data_ref
8520 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8521 dataref_offset
8522 ? dataref_offset
8523 : build_int_cst (ref_type, 0));
8524 if (alignment_support_scheme == dr_aligned)
8526 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8527 TREE_TYPE (data_ref)
8528 = build_aligned_type (TREE_TYPE (data_ref),
8529 align * BITS_PER_UNIT);
8530 else
8531 TREE_TYPE (data_ref)
8532 = build_aligned_type (TREE_TYPE (data_ref),
8533 TYPE_ALIGN (elem_type));
8535 break;
8537 case dr_explicit_realign:
8539 tree ptr, bump;
8541 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8543 if (compute_in_loop)
8544 msq = vect_setup_realignment (first_stmt_info, gsi,
8545 &realignment_token,
8546 dr_explicit_realign,
8547 dataref_ptr, NULL);
8549 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8550 ptr = copy_ssa_name (dataref_ptr);
8551 else
8552 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8553 // For explicit realign the target alignment should be
8554 // known at compile time.
8555 unsigned HOST_WIDE_INT align =
8556 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8557 new_stmt = gimple_build_assign
8558 (ptr, BIT_AND_EXPR, dataref_ptr,
8559 build_int_cst
8560 (TREE_TYPE (dataref_ptr),
8561 -(HOST_WIDE_INT) align));
8562 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8563 data_ref
8564 = build2 (MEM_REF, vectype, ptr,
8565 build_int_cst (ref_type, 0));
8566 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8567 vec_dest = vect_create_destination_var (scalar_dest,
8568 vectype);
8569 new_stmt = gimple_build_assign (vec_dest, data_ref);
8570 new_temp = make_ssa_name (vec_dest, new_stmt);
8571 gimple_assign_set_lhs (new_stmt, new_temp);
8572 gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8573 gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8574 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8575 msq = new_temp;
8577 bump = size_binop (MULT_EXPR, vs,
8578 TYPE_SIZE_UNIT (elem_type));
8579 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8580 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8581 stmt_info, bump);
8582 new_stmt = gimple_build_assign
8583 (NULL_TREE, BIT_AND_EXPR, ptr,
8584 build_int_cst
8585 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8586 ptr = copy_ssa_name (ptr, new_stmt);
8587 gimple_assign_set_lhs (new_stmt, ptr);
8588 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8589 data_ref
8590 = build2 (MEM_REF, vectype, ptr,
8591 build_int_cst (ref_type, 0));
8592 break;
8594 case dr_explicit_realign_optimized:
8596 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8597 new_temp = copy_ssa_name (dataref_ptr);
8598 else
8599 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8600 // We should only be doing this if we know the target
8601 // alignment at compile time.
8602 unsigned HOST_WIDE_INT align =
8603 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8604 new_stmt = gimple_build_assign
8605 (new_temp, BIT_AND_EXPR, dataref_ptr,
8606 build_int_cst (TREE_TYPE (dataref_ptr),
8607 -(HOST_WIDE_INT) align));
8608 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8609 data_ref
8610 = build2 (MEM_REF, vectype, new_temp,
8611 build_int_cst (ref_type, 0));
8612 break;
8614 default:
8615 gcc_unreachable ();
8617 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8618 /* DATA_REF is null if we've already built the statement. */
8619 if (data_ref)
8621 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8622 new_stmt = gimple_build_assign (vec_dest, data_ref);
8624 new_temp = make_ssa_name (vec_dest, new_stmt);
8625 gimple_set_lhs (new_stmt, new_temp);
8626 new_stmt_info
8627 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8629 /* 3. Handle explicit realignment if necessary/supported.
8630 Create in loop:
8631 vec_dest = realign_load (msq, lsq, realignment_token) */
8632 if (alignment_support_scheme == dr_explicit_realign_optimized
8633 || alignment_support_scheme == dr_explicit_realign)
8635 lsq = gimple_assign_lhs (new_stmt);
8636 if (!realignment_token)
8637 realignment_token = dataref_ptr;
8638 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8639 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8640 msq, lsq, realignment_token);
8641 new_temp = make_ssa_name (vec_dest, new_stmt);
8642 gimple_assign_set_lhs (new_stmt, new_temp);
8643 new_stmt_info
8644 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8646 if (alignment_support_scheme == dr_explicit_realign_optimized)
8648 gcc_assert (phi);
8649 if (i == vec_num - 1 && j == ncopies - 1)
8650 add_phi_arg (phi, lsq,
8651 loop_latch_edge (containing_loop),
8652 UNKNOWN_LOCATION);
8653 msq = lsq;
8657 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8659 tree perm_mask = perm_mask_for_reverse (vectype);
8660 new_temp = permute_vec_elements (new_temp, new_temp,
8661 perm_mask, stmt_info, gsi);
8662 new_stmt_info = vinfo->lookup_def (new_temp);
8665 /* Collect vector loads and later create their permutation in
8666 vect_transform_grouped_load (). */
8667 if (grouped_load || slp_perm)
8668 dr_chain.quick_push (new_temp);
8670 /* Store vector loads in the corresponding SLP_NODE. */
8671 if (slp && !slp_perm)
8672 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8674 /* With SLP permutation we load the gaps as well, without
8675 we need to skip the gaps after we manage to fully load
8676 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8677 group_elt += nunits;
8678 if (maybe_ne (group_gap_adj, 0U)
8679 && !slp_perm
8680 && known_eq (group_elt, group_size - group_gap_adj))
8682 poly_wide_int bump_val
8683 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8684 * group_gap_adj);
8685 tree bump = wide_int_to_tree (sizetype, bump_val);
8686 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8687 stmt_info, bump);
8688 group_elt = 0;
8691 /* Bump the vector pointer to account for a gap or for excess
8692 elements loaded for a permuted SLP load. */
8693 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8695 poly_wide_int bump_val
8696 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8697 * group_gap_adj);
8698 tree bump = wide_int_to_tree (sizetype, bump_val);
8699 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8700 stmt_info, bump);
8704 if (slp && !slp_perm)
8705 continue;
8707 if (slp_perm)
8709 unsigned n_perms;
8710 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8711 slp_node_instance, false,
8712 &n_perms))
8714 dr_chain.release ();
8715 return false;
8718 else
8720 if (grouped_load)
8722 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8723 vect_transform_grouped_load (stmt_info, dr_chain,
8724 group_size, gsi);
8725 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8727 else
8729 if (j == 0)
8730 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8731 else
8732 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8733 prev_stmt_info = new_stmt_info;
8736 dr_chain.release ();
8739 return true;
8742 /* Function vect_is_simple_cond.
8744 Input:
8745 LOOP - the loop that is being vectorized.
8746 COND - Condition that is checked for simple use.
8748 Output:
8749 *COMP_VECTYPE - the vector type for the comparison.
8750 *DTS - The def types for the arguments of the comparison
8752 Returns whether a COND can be vectorized. Checks whether
8753 condition operands are supportable using vec_is_simple_use. */
8755 static bool
8756 vect_is_simple_cond (tree cond, vec_info *vinfo,
8757 tree *comp_vectype, enum vect_def_type *dts,
8758 tree vectype)
8760 tree lhs, rhs;
8761 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8763 /* Mask case. */
8764 if (TREE_CODE (cond) == SSA_NAME
8765 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8767 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8768 || !*comp_vectype
8769 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8770 return false;
8771 return true;
8774 if (!COMPARISON_CLASS_P (cond))
8775 return false;
8777 lhs = TREE_OPERAND (cond, 0);
8778 rhs = TREE_OPERAND (cond, 1);
8780 if (TREE_CODE (lhs) == SSA_NAME)
8782 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8783 return false;
8785 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8786 || TREE_CODE (lhs) == FIXED_CST)
8787 dts[0] = vect_constant_def;
8788 else
8789 return false;
8791 if (TREE_CODE (rhs) == SSA_NAME)
8793 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8794 return false;
8796 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8797 || TREE_CODE (rhs) == FIXED_CST)
8798 dts[1] = vect_constant_def;
8799 else
8800 return false;
8802 if (vectype1 && vectype2
8803 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8804 TYPE_VECTOR_SUBPARTS (vectype2)))
8805 return false;
8807 *comp_vectype = vectype1 ? vectype1 : vectype2;
8808 /* Invariant comparison. */
8809 if (! *comp_vectype && vectype)
8811 tree scalar_type = TREE_TYPE (lhs);
8812 /* If we can widen the comparison to match vectype do so. */
8813 if (INTEGRAL_TYPE_P (scalar_type)
8814 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8815 TYPE_SIZE (TREE_TYPE (vectype))))
8816 scalar_type = build_nonstandard_integer_type
8817 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8818 TYPE_UNSIGNED (scalar_type));
8819 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8822 return true;
8825 /* vectorizable_condition.
8827 Check if STMT_INFO is conditional modify expression that can be vectorized.
8828 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8829 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8830 at GSI.
8832 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
8834 Return true if STMT_INFO is vectorizable in this way. */
8836 bool
8837 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8838 stmt_vec_info *vec_stmt, bool for_reduction,
8839 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8841 vec_info *vinfo = stmt_info->vinfo;
8842 tree scalar_dest = NULL_TREE;
8843 tree vec_dest = NULL_TREE;
8844 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8845 tree then_clause, else_clause;
8846 tree comp_vectype = NULL_TREE;
8847 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8848 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8849 tree vec_compare;
8850 tree new_temp;
8851 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8852 enum vect_def_type dts[4]
8853 = {vect_unknown_def_type, vect_unknown_def_type,
8854 vect_unknown_def_type, vect_unknown_def_type};
8855 int ndts = 4;
8856 int ncopies;
8857 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8858 stmt_vec_info prev_stmt_info = NULL;
8859 int i, j;
8860 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8861 vec<tree> vec_oprnds0 = vNULL;
8862 vec<tree> vec_oprnds1 = vNULL;
8863 vec<tree> vec_oprnds2 = vNULL;
8864 vec<tree> vec_oprnds3 = vNULL;
8865 tree vec_cmp_type;
8866 bool masked = false;
8868 if (for_reduction && STMT_SLP_TYPE (stmt_info))
8869 return false;
8871 vect_reduction_type reduction_type
8872 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8873 if (reduction_type == TREE_CODE_REDUCTION)
8875 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8876 return false;
8878 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8879 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8880 && for_reduction))
8881 return false;
8883 /* FORNOW: not yet supported. */
8884 if (STMT_VINFO_LIVE_P (stmt_info))
8886 if (dump_enabled_p ())
8887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8888 "value used after loop.\n");
8889 return false;
8893 /* Is vectorizable conditional operation? */
8894 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8895 if (!stmt)
8896 return false;
8898 code = gimple_assign_rhs_code (stmt);
8900 if (code != COND_EXPR)
8901 return false;
8903 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8904 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8906 if (slp_node)
8907 ncopies = 1;
8908 else
8909 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8911 gcc_assert (ncopies >= 1);
8912 if (for_reduction && ncopies > 1)
8913 return false; /* FORNOW */
8915 cond_expr = gimple_assign_rhs1 (stmt);
8916 then_clause = gimple_assign_rhs2 (stmt);
8917 else_clause = gimple_assign_rhs3 (stmt);
8919 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8920 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8921 || !comp_vectype)
8922 return false;
8924 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8925 return false;
8926 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8927 return false;
8929 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8930 return false;
8932 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8933 return false;
8935 masked = !COMPARISON_CLASS_P (cond_expr);
8936 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8938 if (vec_cmp_type == NULL_TREE)
8939 return false;
8941 cond_code = TREE_CODE (cond_expr);
8942 if (!masked)
8944 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8945 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8948 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8950 /* Boolean values may have another representation in vectors
8951 and therefore we prefer bit operations over comparison for
8952 them (which also works for scalar masks). We store opcodes
8953 to use in bitop1 and bitop2. Statement is vectorized as
8954 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8955 depending on bitop1 and bitop2 arity. */
8956 switch (cond_code)
8958 case GT_EXPR:
8959 bitop1 = BIT_NOT_EXPR;
8960 bitop2 = BIT_AND_EXPR;
8961 break;
8962 case GE_EXPR:
8963 bitop1 = BIT_NOT_EXPR;
8964 bitop2 = BIT_IOR_EXPR;
8965 break;
8966 case LT_EXPR:
8967 bitop1 = BIT_NOT_EXPR;
8968 bitop2 = BIT_AND_EXPR;
8969 std::swap (cond_expr0, cond_expr1);
8970 break;
8971 case LE_EXPR:
8972 bitop1 = BIT_NOT_EXPR;
8973 bitop2 = BIT_IOR_EXPR;
8974 std::swap (cond_expr0, cond_expr1);
8975 break;
8976 case NE_EXPR:
8977 bitop1 = BIT_XOR_EXPR;
8978 break;
8979 case EQ_EXPR:
8980 bitop1 = BIT_XOR_EXPR;
8981 bitop2 = BIT_NOT_EXPR;
8982 break;
8983 default:
8984 return false;
8986 cond_code = SSA_NAME;
8989 if (!vec_stmt)
8991 if (bitop1 != NOP_EXPR)
8993 machine_mode mode = TYPE_MODE (comp_vectype);
8994 optab optab;
8996 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8997 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8998 return false;
9000 if (bitop2 != NOP_EXPR)
9002 optab = optab_for_tree_code (bitop2, comp_vectype,
9003 optab_default);
9004 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9005 return false;
9008 if (expand_vec_cond_expr_p (vectype, comp_vectype,
9009 cond_code))
9011 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
9012 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
9013 cost_vec);
9014 return true;
9016 return false;
9019 /* Transform. */
9021 if (!slp_node)
9023 vec_oprnds0.create (1);
9024 vec_oprnds1.create (1);
9025 vec_oprnds2.create (1);
9026 vec_oprnds3.create (1);
9029 /* Handle def. */
9030 scalar_dest = gimple_assign_lhs (stmt);
9031 if (reduction_type != EXTRACT_LAST_REDUCTION)
9032 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9034 /* Handle cond expr. */
9035 for (j = 0; j < ncopies; j++)
9037 stmt_vec_info new_stmt_info = NULL;
9038 if (j == 0)
9040 if (slp_node)
9042 auto_vec<tree, 4> ops;
9043 auto_vec<vec<tree>, 4> vec_defs;
9045 if (masked)
9046 ops.safe_push (cond_expr);
9047 else
9049 ops.safe_push (cond_expr0);
9050 ops.safe_push (cond_expr1);
9052 ops.safe_push (then_clause);
9053 ops.safe_push (else_clause);
9054 vect_get_slp_defs (ops, slp_node, &vec_defs);
9055 vec_oprnds3 = vec_defs.pop ();
9056 vec_oprnds2 = vec_defs.pop ();
9057 if (!masked)
9058 vec_oprnds1 = vec_defs.pop ();
9059 vec_oprnds0 = vec_defs.pop ();
9061 else
9063 if (masked)
9065 vec_cond_lhs
9066 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
9067 comp_vectype);
9069 else
9071 vec_cond_lhs
9072 = vect_get_vec_def_for_operand (cond_expr0,
9073 stmt_info, comp_vectype);
9074 vec_cond_rhs
9075 = vect_get_vec_def_for_operand (cond_expr1,
9076 stmt_info, comp_vectype);
9078 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
9079 stmt_info);
9080 if (reduction_type != EXTRACT_LAST_REDUCTION)
9081 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
9082 stmt_info);
9085 else
9087 vec_cond_lhs
9088 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
9089 if (!masked)
9090 vec_cond_rhs
9091 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
9093 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9094 vec_oprnds2.pop ());
9095 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9096 vec_oprnds3.pop ());
9099 if (!slp_node)
9101 vec_oprnds0.quick_push (vec_cond_lhs);
9102 if (!masked)
9103 vec_oprnds1.quick_push (vec_cond_rhs);
9104 vec_oprnds2.quick_push (vec_then_clause);
9105 vec_oprnds3.quick_push (vec_else_clause);
9108 /* Arguments are ready. Create the new vector stmt. */
9109 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
9111 vec_then_clause = vec_oprnds2[i];
9112 vec_else_clause = vec_oprnds3[i];
9114 if (masked)
9115 vec_compare = vec_cond_lhs;
9116 else
9118 vec_cond_rhs = vec_oprnds1[i];
9119 if (bitop1 == NOP_EXPR)
9120 vec_compare = build2 (cond_code, vec_cmp_type,
9121 vec_cond_lhs, vec_cond_rhs);
9122 else
9124 new_temp = make_ssa_name (vec_cmp_type);
9125 gassign *new_stmt;
9126 if (bitop1 == BIT_NOT_EXPR)
9127 new_stmt = gimple_build_assign (new_temp, bitop1,
9128 vec_cond_rhs);
9129 else
9130 new_stmt
9131 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9132 vec_cond_rhs);
9133 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9134 if (bitop2 == NOP_EXPR)
9135 vec_compare = new_temp;
9136 else if (bitop2 == BIT_NOT_EXPR)
9138 /* Instead of doing ~x ? y : z do x ? z : y. */
9139 vec_compare = new_temp;
9140 std::swap (vec_then_clause, vec_else_clause);
9142 else
9144 vec_compare = make_ssa_name (vec_cmp_type);
9145 new_stmt
9146 = gimple_build_assign (vec_compare, bitop2,
9147 vec_cond_lhs, new_temp);
9148 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9152 if (reduction_type == EXTRACT_LAST_REDUCTION)
9154 if (!is_gimple_val (vec_compare))
9156 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9157 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9158 vec_compare);
9159 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9160 vec_compare = vec_compare_name;
9162 gcall *new_stmt = gimple_build_call_internal
9163 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9164 vec_then_clause);
9165 gimple_call_set_lhs (new_stmt, scalar_dest);
9166 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9167 if (stmt_info->stmt == gsi_stmt (*gsi))
9168 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9169 else
9171 /* In this case we're moving the definition to later in the
9172 block. That doesn't matter because the only uses of the
9173 lhs are in phi statements. */
9174 gimple_stmt_iterator old_gsi
9175 = gsi_for_stmt (stmt_info->stmt);
9176 gsi_remove (&old_gsi, true);
9177 new_stmt_info
9178 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9181 else
9183 new_temp = make_ssa_name (vec_dest);
9184 gassign *new_stmt
9185 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9186 vec_then_clause, vec_else_clause);
9187 new_stmt_info
9188 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9190 if (slp_node)
9191 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9194 if (slp_node)
9195 continue;
9197 if (j == 0)
9198 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9199 else
9200 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9202 prev_stmt_info = new_stmt_info;
9205 vec_oprnds0.release ();
9206 vec_oprnds1.release ();
9207 vec_oprnds2.release ();
9208 vec_oprnds3.release ();
9210 return true;
9213 /* vectorizable_comparison.
9215 Check if STMT_INFO is comparison expression that can be vectorized.
9216 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9217 comparison, put it in VEC_STMT, and insert it at GSI.
9219 Return true if STMT_INFO is vectorizable in this way. */
9221 static bool
9222 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9223 stmt_vec_info *vec_stmt,
9224 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9226 vec_info *vinfo = stmt_info->vinfo;
9227 tree lhs, rhs1, rhs2;
9228 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9229 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9230 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9231 tree new_temp;
9232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9233 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9234 int ndts = 2;
9235 poly_uint64 nunits;
9236 int ncopies;
9237 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9238 stmt_vec_info prev_stmt_info = NULL;
9239 int i, j;
9240 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9241 vec<tree> vec_oprnds0 = vNULL;
9242 vec<tree> vec_oprnds1 = vNULL;
9243 tree mask_type;
9244 tree mask;
9246 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9247 return false;
9249 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9250 return false;
9252 mask_type = vectype;
9253 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9255 if (slp_node)
9256 ncopies = 1;
9257 else
9258 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9260 gcc_assert (ncopies >= 1);
9261 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9262 return false;
9264 if (STMT_VINFO_LIVE_P (stmt_info))
9266 if (dump_enabled_p ())
9267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9268 "value used after loop.\n");
9269 return false;
9272 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9273 if (!stmt)
9274 return false;
9276 code = gimple_assign_rhs_code (stmt);
9278 if (TREE_CODE_CLASS (code) != tcc_comparison)
9279 return false;
9281 rhs1 = gimple_assign_rhs1 (stmt);
9282 rhs2 = gimple_assign_rhs2 (stmt);
9284 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9285 return false;
9287 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9288 return false;
9290 if (vectype1 && vectype2
9291 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9292 TYPE_VECTOR_SUBPARTS (vectype2)))
9293 return false;
9295 vectype = vectype1 ? vectype1 : vectype2;
9297 /* Invariant comparison. */
9298 if (!vectype)
9300 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9301 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9302 return false;
9304 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9305 return false;
9307 /* Can't compare mask and non-mask types. */
9308 if (vectype1 && vectype2
9309 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9310 return false;
9312 /* Boolean values may have another representation in vectors
9313 and therefore we prefer bit operations over comparison for
9314 them (which also works for scalar masks). We store opcodes
9315 to use in bitop1 and bitop2. Statement is vectorized as
9316 BITOP2 (rhs1 BITOP1 rhs2) or
9317 rhs1 BITOP2 (BITOP1 rhs2)
9318 depending on bitop1 and bitop2 arity. */
9319 bool swap_p = false;
9320 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9322 if (code == GT_EXPR)
9324 bitop1 = BIT_NOT_EXPR;
9325 bitop2 = BIT_AND_EXPR;
9327 else if (code == GE_EXPR)
9329 bitop1 = BIT_NOT_EXPR;
9330 bitop2 = BIT_IOR_EXPR;
9332 else if (code == LT_EXPR)
9334 bitop1 = BIT_NOT_EXPR;
9335 bitop2 = BIT_AND_EXPR;
9336 swap_p = true;
9338 else if (code == LE_EXPR)
9340 bitop1 = BIT_NOT_EXPR;
9341 bitop2 = BIT_IOR_EXPR;
9342 swap_p = true;
9344 else
9346 bitop1 = BIT_XOR_EXPR;
9347 if (code == EQ_EXPR)
9348 bitop2 = BIT_NOT_EXPR;
9352 if (!vec_stmt)
9354 if (bitop1 == NOP_EXPR)
9356 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9357 return false;
9359 else
9361 machine_mode mode = TYPE_MODE (vectype);
9362 optab optab;
9364 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9365 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9366 return false;
9368 if (bitop2 != NOP_EXPR)
9370 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9371 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9372 return false;
9376 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9377 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9378 dts, ndts, slp_node, cost_vec);
9379 return true;
9382 /* Transform. */
9383 if (!slp_node)
9385 vec_oprnds0.create (1);
9386 vec_oprnds1.create (1);
9389 /* Handle def. */
9390 lhs = gimple_assign_lhs (stmt);
9391 mask = vect_create_destination_var (lhs, mask_type);
9393 /* Handle cmp expr. */
9394 for (j = 0; j < ncopies; j++)
9396 stmt_vec_info new_stmt_info = NULL;
9397 if (j == 0)
9399 if (slp_node)
9401 auto_vec<tree, 2> ops;
9402 auto_vec<vec<tree>, 2> vec_defs;
9404 ops.safe_push (rhs1);
9405 ops.safe_push (rhs2);
9406 vect_get_slp_defs (ops, slp_node, &vec_defs);
9407 vec_oprnds1 = vec_defs.pop ();
9408 vec_oprnds0 = vec_defs.pop ();
9409 if (swap_p)
9410 std::swap (vec_oprnds0, vec_oprnds1);
9412 else
9414 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9415 vectype);
9416 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9417 vectype);
9420 else
9422 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9423 vec_oprnds0.pop ());
9424 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9425 vec_oprnds1.pop ());
9428 if (!slp_node)
9430 if (swap_p)
9431 std::swap (vec_rhs1, vec_rhs2);
9432 vec_oprnds0.quick_push (vec_rhs1);
9433 vec_oprnds1.quick_push (vec_rhs2);
9436 /* Arguments are ready. Create the new vector stmt. */
9437 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9439 vec_rhs2 = vec_oprnds1[i];
9441 new_temp = make_ssa_name (mask);
9442 if (bitop1 == NOP_EXPR)
9444 gassign *new_stmt = gimple_build_assign (new_temp, code,
9445 vec_rhs1, vec_rhs2);
9446 new_stmt_info
9447 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9449 else
9451 gassign *new_stmt;
9452 if (bitop1 == BIT_NOT_EXPR)
9453 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9454 else
9455 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9456 vec_rhs2);
9457 new_stmt_info
9458 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9459 if (bitop2 != NOP_EXPR)
9461 tree res = make_ssa_name (mask);
9462 if (bitop2 == BIT_NOT_EXPR)
9463 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9464 else
9465 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9466 new_temp);
9467 new_stmt_info
9468 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9471 if (slp_node)
9472 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9475 if (slp_node)
9476 continue;
9478 if (j == 0)
9479 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9480 else
9481 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9483 prev_stmt_info = new_stmt_info;
9486 vec_oprnds0.release ();
9487 vec_oprnds1.release ();
9489 return true;
9492 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9493 can handle all live statements in the node. Otherwise return true
9494 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9495 GSI and VEC_STMT are as for vectorizable_live_operation. */
9497 static bool
9498 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9499 slp_tree slp_node, stmt_vec_info *vec_stmt,
9500 stmt_vector_for_cost *cost_vec)
9502 if (slp_node)
9504 stmt_vec_info slp_stmt_info;
9505 unsigned int i;
9506 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9508 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9509 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9510 vec_stmt, cost_vec))
9511 return false;
9514 else if (STMT_VINFO_LIVE_P (stmt_info)
9515 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9516 vec_stmt, cost_vec))
9517 return false;
9519 return true;
9522 /* Make sure the statement is vectorizable. */
9524 opt_result
9525 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9526 slp_tree node, slp_instance node_instance,
9527 stmt_vector_for_cost *cost_vec)
9529 vec_info *vinfo = stmt_info->vinfo;
9530 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9531 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9532 bool ok;
9533 gimple_seq pattern_def_seq;
9535 if (dump_enabled_p ())
9536 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9537 stmt_info->stmt);
9539 if (gimple_has_volatile_ops (stmt_info->stmt))
9540 return opt_result::failure_at (stmt_info->stmt,
9541 "not vectorized:"
9542 " stmt has volatile operands: %G\n",
9543 stmt_info->stmt);
9545 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9546 && node == NULL
9547 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9549 gimple_stmt_iterator si;
9551 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9553 stmt_vec_info pattern_def_stmt_info
9554 = vinfo->lookup_stmt (gsi_stmt (si));
9555 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9556 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9558 /* Analyze def stmt of STMT if it's a pattern stmt. */
9559 if (dump_enabled_p ())
9560 dump_printf_loc (MSG_NOTE, vect_location,
9561 "==> examining pattern def statement: %G",
9562 pattern_def_stmt_info->stmt);
9564 opt_result res
9565 = vect_analyze_stmt (pattern_def_stmt_info,
9566 need_to_vectorize, node, node_instance,
9567 cost_vec);
9568 if (!res)
9569 return res;
9574 /* Skip stmts that do not need to be vectorized. In loops this is expected
9575 to include:
9576 - the COND_EXPR which is the loop exit condition
9577 - any LABEL_EXPRs in the loop
9578 - computations that are used only for array indexing or loop control.
9579 In basic blocks we only analyze statements that are a part of some SLP
9580 instance, therefore, all the statements are relevant.
9582 Pattern statement needs to be analyzed instead of the original statement
9583 if the original statement is not relevant. Otherwise, we analyze both
9584 statements. In basic blocks we are called from some SLP instance
9585 traversal, don't analyze pattern stmts instead, the pattern stmts
9586 already will be part of SLP instance. */
9588 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9589 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9590 && !STMT_VINFO_LIVE_P (stmt_info))
9592 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9593 && pattern_stmt_info
9594 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9595 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9597 /* Analyze PATTERN_STMT instead of the original stmt. */
9598 stmt_info = pattern_stmt_info;
9599 if (dump_enabled_p ())
9600 dump_printf_loc (MSG_NOTE, vect_location,
9601 "==> examining pattern statement: %G",
9602 stmt_info->stmt);
9604 else
9606 if (dump_enabled_p ())
9607 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9609 return opt_result::success ();
9612 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9613 && node == NULL
9614 && pattern_stmt_info
9615 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9616 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9618 /* Analyze PATTERN_STMT too. */
9619 if (dump_enabled_p ())
9620 dump_printf_loc (MSG_NOTE, vect_location,
9621 "==> examining pattern statement: %G",
9622 pattern_stmt_info->stmt);
9624 opt_result res
9625 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9626 node_instance, cost_vec);
9627 if (!res)
9628 return res;
9631 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9633 case vect_internal_def:
9634 break;
9636 case vect_reduction_def:
9637 case vect_nested_cycle:
9638 gcc_assert (!bb_vinfo
9639 && (relevance == vect_used_in_outer
9640 || relevance == vect_used_in_outer_by_reduction
9641 || relevance == vect_used_by_reduction
9642 || relevance == vect_unused_in_scope
9643 || relevance == vect_used_only_live));
9644 break;
9646 case vect_induction_def:
9647 gcc_assert (!bb_vinfo);
9648 break;
9650 case vect_constant_def:
9651 case vect_external_def:
9652 case vect_unknown_def_type:
9653 default:
9654 gcc_unreachable ();
9657 if (STMT_VINFO_RELEVANT_P (stmt_info))
9659 tree type = gimple_expr_type (stmt_info->stmt);
9660 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9661 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9662 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9663 || (call && gimple_call_lhs (call) == NULL_TREE));
9664 *need_to_vectorize = true;
9667 if (PURE_SLP_STMT (stmt_info) && !node)
9669 if (dump_enabled_p ())
9670 dump_printf_loc (MSG_NOTE, vect_location,
9671 "handled only by SLP analysis\n");
9672 return opt_result::success ();
9675 ok = true;
9676 if (!bb_vinfo
9677 && (STMT_VINFO_RELEVANT_P (stmt_info)
9678 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9679 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9680 -mveclibabi= takes preference over library functions with
9681 the simd attribute. */
9682 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9683 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9684 cost_vec)
9685 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9686 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9687 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9688 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9689 cost_vec)
9690 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9691 || vectorizable_reduction (stmt_info, NULL, NULL, node,
9692 node_instance, cost_vec)
9693 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9694 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9695 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9696 cost_vec)
9697 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9698 cost_vec));
9699 else
9701 if (bb_vinfo)
9702 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9703 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9704 cost_vec)
9705 || vectorizable_conversion (stmt_info, NULL, NULL, node,
9706 cost_vec)
9707 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9708 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9709 || vectorizable_assignment (stmt_info, NULL, NULL, node,
9710 cost_vec)
9711 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9712 cost_vec)
9713 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9714 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9715 cost_vec)
9716 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9717 cost_vec));
9720 if (!ok)
9721 return opt_result::failure_at (stmt_info->stmt,
9722 "not vectorized:"
9723 " relevant stmt not supported: %G",
9724 stmt_info->stmt);
9726 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9727 need extra handling, except for vectorizable reductions. */
9728 if (!bb_vinfo
9729 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9730 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9731 return opt_result::failure_at (stmt_info->stmt,
9732 "not vectorized:"
9733 " live stmt not supported: %G",
9734 stmt_info->stmt);
9736 return opt_result::success ();
9740 /* Function vect_transform_stmt.
9742 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9744 bool
9745 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9746 slp_tree slp_node, slp_instance slp_node_instance)
9748 vec_info *vinfo = stmt_info->vinfo;
9749 bool is_store = false;
9750 stmt_vec_info vec_stmt = NULL;
9751 bool done;
9753 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9754 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9756 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9757 && nested_in_vect_loop_p
9758 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9759 stmt_info));
9761 gimple *stmt = stmt_info->stmt;
9762 switch (STMT_VINFO_TYPE (stmt_info))
9764 case type_demotion_vec_info_type:
9765 case type_promotion_vec_info_type:
9766 case type_conversion_vec_info_type:
9767 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9768 NULL);
9769 gcc_assert (done);
9770 break;
9772 case induc_vec_info_type:
9773 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9774 NULL);
9775 gcc_assert (done);
9776 break;
9778 case shift_vec_info_type:
9779 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9780 gcc_assert (done);
9781 break;
9783 case op_vec_info_type:
9784 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9785 NULL);
9786 gcc_assert (done);
9787 break;
9789 case assignment_vec_info_type:
9790 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9791 NULL);
9792 gcc_assert (done);
9793 break;
9795 case load_vec_info_type:
9796 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9797 slp_node_instance, NULL);
9798 gcc_assert (done);
9799 break;
9801 case store_vec_info_type:
9802 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9803 gcc_assert (done);
9804 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9806 /* In case of interleaving, the whole chain is vectorized when the
9807 last store in the chain is reached. Store stmts before the last
9808 one are skipped, and there vec_stmt_info shouldn't be freed
9809 meanwhile. */
9810 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9811 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9812 is_store = true;
9814 else
9815 is_store = true;
9816 break;
9818 case condition_vec_info_type:
9819 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
9820 slp_node, NULL);
9821 gcc_assert (done);
9822 break;
9824 case comparison_vec_info_type:
9825 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
9826 slp_node, NULL);
9827 gcc_assert (done);
9828 break;
9830 case call_vec_info_type:
9831 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9832 stmt = gsi_stmt (*gsi);
9833 break;
9835 case call_simd_clone_vec_info_type:
9836 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9837 slp_node, NULL);
9838 stmt = gsi_stmt (*gsi);
9839 break;
9841 case reduc_vec_info_type:
9842 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9843 slp_node_instance, NULL);
9844 gcc_assert (done);
9845 break;
9847 default:
9848 if (!STMT_VINFO_LIVE_P (stmt_info))
9850 if (dump_enabled_p ())
9851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9852 "stmt not supported.\n");
9853 gcc_unreachable ();
9857 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9858 This would break hybrid SLP vectorization. */
9859 if (slp_node)
9860 gcc_assert (!vec_stmt
9861 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9863 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9864 is being vectorized, but outside the immediately enclosing loop. */
9865 if (vec_stmt
9866 && nested_p
9867 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9868 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9869 || STMT_VINFO_RELEVANT (stmt_info) ==
9870 vect_used_in_outer_by_reduction))
9872 struct loop *innerloop = LOOP_VINFO_LOOP (
9873 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9874 imm_use_iterator imm_iter;
9875 use_operand_p use_p;
9876 tree scalar_dest;
9878 if (dump_enabled_p ())
9879 dump_printf_loc (MSG_NOTE, vect_location,
9880 "Record the vdef for outer-loop vectorization.\n");
9882 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9883 (to be used when vectorizing outer-loop stmts that use the DEF of
9884 STMT). */
9885 if (gimple_code (stmt) == GIMPLE_PHI)
9886 scalar_dest = PHI_RESULT (stmt);
9887 else
9888 scalar_dest = gimple_get_lhs (stmt);
9890 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9891 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9893 stmt_vec_info exit_phi_info
9894 = vinfo->lookup_stmt (USE_STMT (use_p));
9895 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9899 /* Handle stmts whose DEF is used outside the loop-nest that is
9900 being vectorized. */
9901 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9903 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9904 NULL);
9905 gcc_assert (done);
9908 if (vec_stmt)
9909 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9911 return is_store;
9915 /* Remove a group of stores (for SLP or interleaving), free their
9916 stmt_vec_info. */
9918 void
9919 vect_remove_stores (stmt_vec_info first_stmt_info)
9921 vec_info *vinfo = first_stmt_info->vinfo;
9922 stmt_vec_info next_stmt_info = first_stmt_info;
9924 while (next_stmt_info)
9926 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9927 next_stmt_info = vect_orig_stmt (next_stmt_info);
9928 /* Free the attached stmt_vec_info and remove the stmt. */
9929 vinfo->remove_stmt (next_stmt_info);
9930 next_stmt_info = tmp;
9934 /* Function get_vectype_for_scalar_type_and_size.
9936 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9937 by the target. */
9939 tree
9940 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9942 tree orig_scalar_type = scalar_type;
9943 scalar_mode inner_mode;
9944 machine_mode simd_mode;
9945 poly_uint64 nunits;
9946 tree vectype;
9948 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9949 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9950 return NULL_TREE;
9952 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9954 /* For vector types of elements whose mode precision doesn't
9955 match their types precision we use a element type of mode
9956 precision. The vectorization routines will have to make sure
9957 they support the proper result truncation/extension.
9958 We also make sure to build vector types with INTEGER_TYPE
9959 component type only. */
9960 if (INTEGRAL_TYPE_P (scalar_type)
9961 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9962 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9963 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9964 TYPE_UNSIGNED (scalar_type));
9966 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9967 When the component mode passes the above test simply use a type
9968 corresponding to that mode. The theory is that any use that
9969 would cause problems with this will disable vectorization anyway. */
9970 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9971 && !INTEGRAL_TYPE_P (scalar_type))
9972 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9974 /* We can't build a vector type of elements with alignment bigger than
9975 their size. */
9976 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9977 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9978 TYPE_UNSIGNED (scalar_type));
9980 /* If we felt back to using the mode fail if there was
9981 no scalar type for it. */
9982 if (scalar_type == NULL_TREE)
9983 return NULL_TREE;
9985 /* If no size was supplied use the mode the target prefers. Otherwise
9986 lookup a vector mode of the specified size. */
9987 if (known_eq (size, 0U))
9988 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9989 else if (!multiple_p (size, nbytes, &nunits)
9990 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9991 return NULL_TREE;
9992 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9993 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9994 return NULL_TREE;
9996 vectype = build_vector_type (scalar_type, nunits);
9998 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9999 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
10000 return NULL_TREE;
10002 /* Re-attach the address-space qualifier if we canonicalized the scalar
10003 type. */
10004 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10005 return build_qualified_type
10006 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10008 return vectype;
10011 poly_uint64 current_vector_size;
10013 /* Function get_vectype_for_scalar_type.
10015 Returns the vector type corresponding to SCALAR_TYPE as supported
10016 by the target. */
10018 tree
10019 get_vectype_for_scalar_type (tree scalar_type)
10021 tree vectype;
10022 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10023 current_vector_size);
10024 if (vectype
10025 && known_eq (current_vector_size, 0U))
10026 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10027 return vectype;
10030 /* Function get_mask_type_for_scalar_type.
10032 Returns the mask type corresponding to a result of comparison
10033 of vectors of specified SCALAR_TYPE as supported by target. */
10035 tree
10036 get_mask_type_for_scalar_type (tree scalar_type)
10038 tree vectype = get_vectype_for_scalar_type (scalar_type);
10040 if (!vectype)
10041 return NULL;
10043 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10044 current_vector_size);
10047 /* Function get_same_sized_vectype
10049 Returns a vector type corresponding to SCALAR_TYPE of size
10050 VECTOR_TYPE if supported by the target. */
10052 tree
10053 get_same_sized_vectype (tree scalar_type, tree vector_type)
10055 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10056 return build_same_sized_truth_vector_type (vector_type);
10058 return get_vectype_for_scalar_type_and_size
10059 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
10062 /* Function vect_is_simple_use.
10064 Input:
10065 VINFO - the vect info of the loop or basic block that is being vectorized.
10066 OPERAND - operand in the loop or bb.
10067 Output:
10068 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10069 case OPERAND is an SSA_NAME that is defined in the vectorizable region
10070 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10071 the definition could be anywhere in the function
10072 DT - the type of definition
10074 Returns whether a stmt with OPERAND can be vectorized.
10075 For loops, supportable operands are constants, loop invariants, and operands
10076 that are defined by the current iteration of the loop. Unsupportable
10077 operands are those that are defined by a previous iteration of the loop (as
10078 is the case in reduction/induction computations).
10079 For basic blocks, supportable operands are constants and bb invariants.
10080 For now, operands defined outside the basic block are not supported. */
10082 bool
10083 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10084 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
10086 if (def_stmt_info_out)
10087 *def_stmt_info_out = NULL;
10088 if (def_stmt_out)
10089 *def_stmt_out = NULL;
10090 *dt = vect_unknown_def_type;
10092 if (dump_enabled_p ())
10094 dump_printf_loc (MSG_NOTE, vect_location,
10095 "vect_is_simple_use: operand ");
10096 if (TREE_CODE (operand) == SSA_NAME
10097 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10098 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10099 else
10100 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10103 if (CONSTANT_CLASS_P (operand))
10104 *dt = vect_constant_def;
10105 else if (is_gimple_min_invariant (operand))
10106 *dt = vect_external_def;
10107 else if (TREE_CODE (operand) != SSA_NAME)
10108 *dt = vect_unknown_def_type;
10109 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
10110 *dt = vect_external_def;
10111 else
10113 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10114 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10115 if (!stmt_vinfo)
10116 *dt = vect_external_def;
10117 else
10119 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
10120 def_stmt = stmt_vinfo->stmt;
10121 switch (gimple_code (def_stmt))
10123 case GIMPLE_PHI:
10124 case GIMPLE_ASSIGN:
10125 case GIMPLE_CALL:
10126 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10127 break;
10128 default:
10129 *dt = vect_unknown_def_type;
10130 break;
10132 if (def_stmt_info_out)
10133 *def_stmt_info_out = stmt_vinfo;
10135 if (def_stmt_out)
10136 *def_stmt_out = def_stmt;
10139 if (dump_enabled_p ())
10141 dump_printf (MSG_NOTE, ", type of def: ");
10142 switch (*dt)
10144 case vect_uninitialized_def:
10145 dump_printf (MSG_NOTE, "uninitialized\n");
10146 break;
10147 case vect_constant_def:
10148 dump_printf (MSG_NOTE, "constant\n");
10149 break;
10150 case vect_external_def:
10151 dump_printf (MSG_NOTE, "external\n");
10152 break;
10153 case vect_internal_def:
10154 dump_printf (MSG_NOTE, "internal\n");
10155 break;
10156 case vect_induction_def:
10157 dump_printf (MSG_NOTE, "induction\n");
10158 break;
10159 case vect_reduction_def:
10160 dump_printf (MSG_NOTE, "reduction\n");
10161 break;
10162 case vect_double_reduction_def:
10163 dump_printf (MSG_NOTE, "double reduction\n");
10164 break;
10165 case vect_nested_cycle:
10166 dump_printf (MSG_NOTE, "nested cycle\n");
10167 break;
10168 case vect_unknown_def_type:
10169 dump_printf (MSG_NOTE, "unknown\n");
10170 break;
10174 if (*dt == vect_unknown_def_type)
10176 if (dump_enabled_p ())
10177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10178 "Unsupported pattern.\n");
10179 return false;
10182 return true;
10185 /* Function vect_is_simple_use.
10187 Same as vect_is_simple_use but also determines the vector operand
10188 type of OPERAND and stores it to *VECTYPE. If the definition of
10189 OPERAND is vect_uninitialized_def, vect_constant_def or
10190 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10191 is responsible to compute the best suited vector type for the
10192 scalar operand. */
10194 bool
10195 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10196 tree *vectype, stmt_vec_info *def_stmt_info_out,
10197 gimple **def_stmt_out)
10199 stmt_vec_info def_stmt_info;
10200 gimple *def_stmt;
10201 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10202 return false;
10204 if (def_stmt_out)
10205 *def_stmt_out = def_stmt;
10206 if (def_stmt_info_out)
10207 *def_stmt_info_out = def_stmt_info;
10209 /* Now get a vector type if the def is internal, otherwise supply
10210 NULL_TREE and leave it up to the caller to figure out a proper
10211 type for the use stmt. */
10212 if (*dt == vect_internal_def
10213 || *dt == vect_induction_def
10214 || *dt == vect_reduction_def
10215 || *dt == vect_double_reduction_def
10216 || *dt == vect_nested_cycle)
10218 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10219 gcc_assert (*vectype != NULL_TREE);
10220 if (dump_enabled_p ())
10221 dump_printf_loc (MSG_NOTE, vect_location,
10222 "vect_is_simple_use: vectype %T\n", *vectype);
10224 else if (*dt == vect_uninitialized_def
10225 || *dt == vect_constant_def
10226 || *dt == vect_external_def)
10227 *vectype = NULL_TREE;
10228 else
10229 gcc_unreachable ();
10231 return true;
10235 /* Function supportable_widening_operation
10237 Check whether an operation represented by the code CODE is a
10238 widening operation that is supported by the target platform in
10239 vector form (i.e., when operating on arguments of type VECTYPE_IN
10240 producing a result of type VECTYPE_OUT).
10242 Widening operations we currently support are NOP (CONVERT), FLOAT,
10243 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10244 are supported by the target platform either directly (via vector
10245 tree-codes), or via target builtins.
10247 Output:
10248 - CODE1 and CODE2 are codes of vector operations to be used when
10249 vectorizing the operation, if available.
10250 - MULTI_STEP_CVT determines the number of required intermediate steps in
10251 case of multi-step conversion (like char->short->int - in that case
10252 MULTI_STEP_CVT will be 1).
10253 - INTERM_TYPES contains the intermediate type required to perform the
10254 widening operation (short in the above example). */
10256 bool
10257 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10258 tree vectype_out, tree vectype_in,
10259 enum tree_code *code1, enum tree_code *code2,
10260 int *multi_step_cvt,
10261 vec<tree> *interm_types)
10263 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10264 struct loop *vect_loop = NULL;
10265 machine_mode vec_mode;
10266 enum insn_code icode1, icode2;
10267 optab optab1, optab2;
10268 tree vectype = vectype_in;
10269 tree wide_vectype = vectype_out;
10270 enum tree_code c1, c2;
10271 int i;
10272 tree prev_type, intermediate_type;
10273 machine_mode intermediate_mode, prev_mode;
10274 optab optab3, optab4;
10276 *multi_step_cvt = 0;
10277 if (loop_info)
10278 vect_loop = LOOP_VINFO_LOOP (loop_info);
10280 switch (code)
10282 case WIDEN_MULT_EXPR:
10283 /* The result of a vectorized widening operation usually requires
10284 two vectors (because the widened results do not fit into one vector).
10285 The generated vector results would normally be expected to be
10286 generated in the same order as in the original scalar computation,
10287 i.e. if 8 results are generated in each vector iteration, they are
10288 to be organized as follows:
10289 vect1: [res1,res2,res3,res4],
10290 vect2: [res5,res6,res7,res8].
10292 However, in the special case that the result of the widening
10293 operation is used in a reduction computation only, the order doesn't
10294 matter (because when vectorizing a reduction we change the order of
10295 the computation). Some targets can take advantage of this and
10296 generate more efficient code. For example, targets like Altivec,
10297 that support widen_mult using a sequence of {mult_even,mult_odd}
10298 generate the following vectors:
10299 vect1: [res1,res3,res5,res7],
10300 vect2: [res2,res4,res6,res8].
10302 When vectorizing outer-loops, we execute the inner-loop sequentially
10303 (each vectorized inner-loop iteration contributes to VF outer-loop
10304 iterations in parallel). We therefore don't allow to change the
10305 order of the computation in the inner-loop during outer-loop
10306 vectorization. */
10307 /* TODO: Another case in which order doesn't *really* matter is when we
10308 widen and then contract again, e.g. (short)((int)x * y >> 8).
10309 Normally, pack_trunc performs an even/odd permute, whereas the
10310 repack from an even/odd expansion would be an interleave, which
10311 would be significantly simpler for e.g. AVX2. */
10312 /* In any case, in order to avoid duplicating the code below, recurse
10313 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10314 are properly set up for the caller. If we fail, we'll continue with
10315 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10316 if (vect_loop
10317 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10318 && !nested_in_vect_loop_p (vect_loop, stmt_info)
10319 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10320 stmt_info, vectype_out,
10321 vectype_in, code1, code2,
10322 multi_step_cvt, interm_types))
10324 /* Elements in a vector with vect_used_by_reduction property cannot
10325 be reordered if the use chain with this property does not have the
10326 same operation. One such an example is s += a * b, where elements
10327 in a and b cannot be reordered. Here we check if the vector defined
10328 by STMT is only directly used in the reduction statement. */
10329 tree lhs = gimple_assign_lhs (stmt_info->stmt);
10330 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10331 if (use_stmt_info
10332 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10333 return true;
10335 c1 = VEC_WIDEN_MULT_LO_EXPR;
10336 c2 = VEC_WIDEN_MULT_HI_EXPR;
10337 break;
10339 case DOT_PROD_EXPR:
10340 c1 = DOT_PROD_EXPR;
10341 c2 = DOT_PROD_EXPR;
10342 break;
10344 case SAD_EXPR:
10345 c1 = SAD_EXPR;
10346 c2 = SAD_EXPR;
10347 break;
10349 case VEC_WIDEN_MULT_EVEN_EXPR:
10350 /* Support the recursion induced just above. */
10351 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10352 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10353 break;
10355 case WIDEN_LSHIFT_EXPR:
10356 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10357 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10358 break;
10360 CASE_CONVERT:
10361 c1 = VEC_UNPACK_LO_EXPR;
10362 c2 = VEC_UNPACK_HI_EXPR;
10363 break;
10365 case FLOAT_EXPR:
10366 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10367 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10368 break;
10370 case FIX_TRUNC_EXPR:
10371 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10372 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10373 break;
10375 default:
10376 gcc_unreachable ();
10379 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10380 std::swap (c1, c2);
10382 if (code == FIX_TRUNC_EXPR)
10384 /* The signedness is determined from output operand. */
10385 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10386 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10388 else if (CONVERT_EXPR_CODE_P (code)
10389 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
10390 && VECTOR_BOOLEAN_TYPE_P (vectype)
10391 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
10392 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10394 /* If the input and result modes are the same, a different optab
10395 is needed where we pass in the number of units in vectype. */
10396 optab1 = vec_unpacks_sbool_lo_optab;
10397 optab2 = vec_unpacks_sbool_hi_optab;
10399 else
10401 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10402 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10405 if (!optab1 || !optab2)
10406 return false;
10408 vec_mode = TYPE_MODE (vectype);
10409 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10410 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10411 return false;
10413 *code1 = c1;
10414 *code2 = c2;
10416 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10417 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10419 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10420 return true;
10421 /* For scalar masks we may have different boolean
10422 vector types having the same QImode. Thus we
10423 add additional check for elements number. */
10424 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10425 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10426 return true;
10429 /* Check if it's a multi-step conversion that can be done using intermediate
10430 types. */
10432 prev_type = vectype;
10433 prev_mode = vec_mode;
10435 if (!CONVERT_EXPR_CODE_P (code))
10436 return false;
10438 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10439 intermediate steps in promotion sequence. We try
10440 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10441 not. */
10442 interm_types->create (MAX_INTERM_CVT_STEPS);
10443 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10445 intermediate_mode = insn_data[icode1].operand[0].mode;
10446 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10448 intermediate_type = vect_halve_mask_nunits (prev_type);
10449 if (intermediate_mode != TYPE_MODE (intermediate_type))
10450 return false;
10452 else
10453 intermediate_type
10454 = lang_hooks.types.type_for_mode (intermediate_mode,
10455 TYPE_UNSIGNED (prev_type));
10457 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10458 && VECTOR_BOOLEAN_TYPE_P (prev_type)
10459 && intermediate_mode == prev_mode
10460 && SCALAR_INT_MODE_P (prev_mode))
10462 /* If the input and result modes are the same, a different optab
10463 is needed where we pass in the number of units in vectype. */
10464 optab3 = vec_unpacks_sbool_lo_optab;
10465 optab4 = vec_unpacks_sbool_hi_optab;
10467 else
10469 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10470 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10473 if (!optab3 || !optab4
10474 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10475 || insn_data[icode1].operand[0].mode != intermediate_mode
10476 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10477 || insn_data[icode2].operand[0].mode != intermediate_mode
10478 || ((icode1 = optab_handler (optab3, intermediate_mode))
10479 == CODE_FOR_nothing)
10480 || ((icode2 = optab_handler (optab4, intermediate_mode))
10481 == CODE_FOR_nothing))
10482 break;
10484 interm_types->quick_push (intermediate_type);
10485 (*multi_step_cvt)++;
10487 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10488 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10490 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10491 return true;
10492 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10493 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10494 return true;
10497 prev_type = intermediate_type;
10498 prev_mode = intermediate_mode;
10501 interm_types->release ();
10502 return false;
10506 /* Function supportable_narrowing_operation
10508 Check whether an operation represented by the code CODE is a
10509 narrowing operation that is supported by the target platform in
10510 vector form (i.e., when operating on arguments of type VECTYPE_IN
10511 and producing a result of type VECTYPE_OUT).
10513 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10514 and FLOAT. This function checks if these operations are supported by
10515 the target platform directly via vector tree-codes.
10517 Output:
10518 - CODE1 is the code of a vector operation to be used when
10519 vectorizing the operation, if available.
10520 - MULTI_STEP_CVT determines the number of required intermediate steps in
10521 case of multi-step conversion (like int->short->char - in that case
10522 MULTI_STEP_CVT will be 1).
10523 - INTERM_TYPES contains the intermediate type required to perform the
10524 narrowing operation (short in the above example). */
10526 bool
10527 supportable_narrowing_operation (enum tree_code code,
10528 tree vectype_out, tree vectype_in,
10529 enum tree_code *code1, int *multi_step_cvt,
10530 vec<tree> *interm_types)
10532 machine_mode vec_mode;
10533 enum insn_code icode1;
10534 optab optab1, interm_optab;
10535 tree vectype = vectype_in;
10536 tree narrow_vectype = vectype_out;
10537 enum tree_code c1;
10538 tree intermediate_type, prev_type;
10539 machine_mode intermediate_mode, prev_mode;
10540 int i;
10541 bool uns;
10543 *multi_step_cvt = 0;
10544 switch (code)
10546 CASE_CONVERT:
10547 c1 = VEC_PACK_TRUNC_EXPR;
10548 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
10549 && VECTOR_BOOLEAN_TYPE_P (vectype)
10550 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
10551 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10552 optab1 = vec_pack_sbool_trunc_optab;
10553 else
10554 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10555 break;
10557 case FIX_TRUNC_EXPR:
10558 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10559 /* The signedness is determined from output operand. */
10560 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10561 break;
10563 case FLOAT_EXPR:
10564 c1 = VEC_PACK_FLOAT_EXPR;
10565 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10566 break;
10568 default:
10569 gcc_unreachable ();
10572 if (!optab1)
10573 return false;
10575 vec_mode = TYPE_MODE (vectype);
10576 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10577 return false;
10579 *code1 = c1;
10581 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10583 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10584 return true;
10585 /* For scalar masks we may have different boolean
10586 vector types having the same QImode. Thus we
10587 add additional check for elements number. */
10588 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10589 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10590 return true;
10593 if (code == FLOAT_EXPR)
10594 return false;
10596 /* Check if it's a multi-step conversion that can be done using intermediate
10597 types. */
10598 prev_mode = vec_mode;
10599 prev_type = vectype;
10600 if (code == FIX_TRUNC_EXPR)
10601 uns = TYPE_UNSIGNED (vectype_out);
10602 else
10603 uns = TYPE_UNSIGNED (vectype);
10605 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10606 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10607 costly than signed. */
10608 if (code == FIX_TRUNC_EXPR && uns)
10610 enum insn_code icode2;
10612 intermediate_type
10613 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10614 interm_optab
10615 = optab_for_tree_code (c1, intermediate_type, optab_default);
10616 if (interm_optab != unknown_optab
10617 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10618 && insn_data[icode1].operand[0].mode
10619 == insn_data[icode2].operand[0].mode)
10621 uns = false;
10622 optab1 = interm_optab;
10623 icode1 = icode2;
10627 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10628 intermediate steps in promotion sequence. We try
10629 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10630 interm_types->create (MAX_INTERM_CVT_STEPS);
10631 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10633 intermediate_mode = insn_data[icode1].operand[0].mode;
10634 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10636 intermediate_type = vect_double_mask_nunits (prev_type);
10637 if (intermediate_mode != TYPE_MODE (intermediate_type))
10638 return false;
10640 else
10641 intermediate_type
10642 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10643 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10644 && VECTOR_BOOLEAN_TYPE_P (prev_type)
10645 && intermediate_mode == prev_mode
10646 && SCALAR_INT_MODE_P (prev_mode))
10647 interm_optab = vec_pack_sbool_trunc_optab;
10648 else
10649 interm_optab
10650 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10651 optab_default);
10652 if (!interm_optab
10653 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10654 || insn_data[icode1].operand[0].mode != intermediate_mode
10655 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10656 == CODE_FOR_nothing))
10657 break;
10659 interm_types->quick_push (intermediate_type);
10660 (*multi_step_cvt)++;
10662 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10664 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10665 return true;
10666 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10667 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10668 return true;
10671 prev_mode = intermediate_mode;
10672 prev_type = intermediate_type;
10673 optab1 = interm_optab;
10676 interm_types->release ();
10677 return false;
10680 /* Generate and return a statement that sets vector mask MASK such that
10681 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10683 gcall *
10684 vect_gen_while (tree mask, tree start_index, tree end_index)
10686 tree cmp_type = TREE_TYPE (start_index);
10687 tree mask_type = TREE_TYPE (mask);
10688 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10689 cmp_type, mask_type,
10690 OPTIMIZE_FOR_SPEED));
10691 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10692 start_index, end_index,
10693 build_zero_cst (mask_type));
10694 gimple_call_set_lhs (call, mask);
10695 return call;
10698 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10699 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10701 tree
10702 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10703 tree end_index)
10705 tree tmp = make_ssa_name (mask_type);
10706 gcall *call = vect_gen_while (tmp, start_index, end_index);
10707 gimple_seq_add_stmt (seq, call);
10708 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10711 /* Try to compute the vector types required to vectorize STMT_INFO,
10712 returning true on success and false if vectorization isn't possible.
10714 On success:
10716 - Set *STMT_VECTYPE_OUT to:
10717 - NULL_TREE if the statement doesn't need to be vectorized;
10718 - boolean_type_node if the statement is a boolean operation whose
10719 vector type can only be determined once all the other vector types
10720 are known; and
10721 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10723 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10724 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10725 statement does not help to determine the overall number of units. */
10727 opt_result
10728 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10729 tree *stmt_vectype_out,
10730 tree *nunits_vectype_out)
10732 gimple *stmt = stmt_info->stmt;
10734 *stmt_vectype_out = NULL_TREE;
10735 *nunits_vectype_out = NULL_TREE;
10737 if (gimple_get_lhs (stmt) == NULL_TREE
10738 /* MASK_STORE has no lhs, but is ok. */
10739 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10741 if (is_a <gcall *> (stmt))
10743 /* Ignore calls with no lhs. These must be calls to
10744 #pragma omp simd functions, and what vectorization factor
10745 it really needs can't be determined until
10746 vectorizable_simd_clone_call. */
10747 if (dump_enabled_p ())
10748 dump_printf_loc (MSG_NOTE, vect_location,
10749 "defer to SIMD clone analysis.\n");
10750 return opt_result::success ();
10753 return opt_result::failure_at (stmt,
10754 "not vectorized: irregular stmt.%G", stmt);
10757 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10758 return opt_result::failure_at (stmt,
10759 "not vectorized: vector stmt in loop:%G",
10760 stmt);
10762 tree vectype;
10763 tree scalar_type = NULL_TREE;
10764 if (STMT_VINFO_VECTYPE (stmt_info))
10765 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10766 else
10768 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10769 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10770 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10771 else
10772 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10774 /* Pure bool ops don't participate in number-of-units computation.
10775 For comparisons use the types being compared. */
10776 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10777 && is_gimple_assign (stmt)
10778 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10780 *stmt_vectype_out = boolean_type_node;
10782 tree rhs1 = gimple_assign_rhs1 (stmt);
10783 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10784 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10785 scalar_type = TREE_TYPE (rhs1);
10786 else
10788 if (dump_enabled_p ())
10789 dump_printf_loc (MSG_NOTE, vect_location,
10790 "pure bool operation.\n");
10791 return opt_result::success ();
10795 if (dump_enabled_p ())
10796 dump_printf_loc (MSG_NOTE, vect_location,
10797 "get vectype for scalar type: %T\n", scalar_type);
10798 vectype = get_vectype_for_scalar_type (scalar_type);
10799 if (!vectype)
10800 return opt_result::failure_at (stmt,
10801 "not vectorized:"
10802 " unsupported data-type %T\n",
10803 scalar_type);
10805 if (!*stmt_vectype_out)
10806 *stmt_vectype_out = vectype;
10808 if (dump_enabled_p ())
10809 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
10812 /* Don't try to compute scalar types if the stmt produces a boolean
10813 vector; use the existing vector type instead. */
10814 tree nunits_vectype;
10815 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10816 nunits_vectype = vectype;
10817 else
10819 /* The number of units is set according to the smallest scalar
10820 type (or the largest vector size, but we only support one
10821 vector size per vectorization). */
10822 if (*stmt_vectype_out != boolean_type_node)
10824 HOST_WIDE_INT dummy;
10825 scalar_type = vect_get_smallest_scalar_type (stmt_info,
10826 &dummy, &dummy);
10828 if (dump_enabled_p ())
10829 dump_printf_loc (MSG_NOTE, vect_location,
10830 "get vectype for scalar type: %T\n", scalar_type);
10831 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10833 if (!nunits_vectype)
10834 return opt_result::failure_at (stmt,
10835 "not vectorized: unsupported data-type %T\n",
10836 scalar_type);
10838 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10839 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10840 return opt_result::failure_at (stmt,
10841 "not vectorized: different sized vector "
10842 "types in statement, %T and %T\n",
10843 vectype, nunits_vectype);
10845 if (dump_enabled_p ())
10847 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10848 nunits_vectype);
10850 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10851 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10852 dump_printf (MSG_NOTE, "\n");
10855 *nunits_vectype_out = nunits_vectype;
10856 return opt_result::success ();
10859 /* Try to determine the correct vector type for STMT_INFO, which is a
10860 statement that produces a scalar boolean result. Return the vector
10861 type on success, otherwise return NULL_TREE. */
10863 opt_tree
10864 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10866 gimple *stmt = stmt_info->stmt;
10867 tree mask_type = NULL;
10868 tree vectype, scalar_type;
10870 if (is_gimple_assign (stmt)
10871 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10872 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10874 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10875 mask_type = get_mask_type_for_scalar_type (scalar_type);
10877 if (!mask_type)
10878 return opt_tree::failure_at (stmt,
10879 "not vectorized: unsupported mask\n");
10881 else
10883 tree rhs;
10884 ssa_op_iter iter;
10885 enum vect_def_type dt;
10887 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10889 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10890 return opt_tree::failure_at (stmt,
10891 "not vectorized:can't compute mask"
10892 " type for statement, %G", stmt);
10894 /* No vectype probably means external definition.
10895 Allow it in case there is another operand which
10896 allows to determine mask type. */
10897 if (!vectype)
10898 continue;
10900 if (!mask_type)
10901 mask_type = vectype;
10902 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10903 TYPE_VECTOR_SUBPARTS (vectype)))
10904 return opt_tree::failure_at (stmt,
10905 "not vectorized: different sized mask"
10906 " types in statement, %T and %T\n",
10907 mask_type, vectype);
10908 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10909 != VECTOR_BOOLEAN_TYPE_P (vectype))
10910 return opt_tree::failure_at (stmt,
10911 "not vectorized: mixed mask and "
10912 "nonmask vector types in statement, "
10913 "%T and %T\n",
10914 mask_type, vectype);
10917 /* We may compare boolean value loaded as vector of integers.
10918 Fix mask_type in such case. */
10919 if (mask_type
10920 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10921 && gimple_code (stmt) == GIMPLE_ASSIGN
10922 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10923 mask_type = build_same_sized_truth_vector_type (mask_type);
10926 /* No mask_type should mean loop invariant predicate.
10927 This is probably a subject for optimization in if-conversion. */
10928 if (!mask_type)
10929 return opt_tree::failure_at (stmt,
10930 "not vectorized: can't compute mask type "
10931 "for statement: %G", stmt);
10933 return opt_tree::success (mask_type);