typeck.c (cp_build_function_call_vec): When mark_used fails unconditionally return...
[official-gcc.git] / gcc / tree-vect-stmts.c
blobced4264722c226e9ec93956a07b168782ce85c78
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
58 /* For lang_hooks.types.type_for_mode. */
59 #include "langhooks.h"
61 /* Return the vectorized type for the given statement. */
63 tree
64 stmt_vectype (struct _stmt_vec_info *stmt_info)
66 return STMT_VINFO_VECTYPE (stmt_info);
69 /* Return TRUE iff the given statement is in an inner loop relative to
70 the loop being vectorized. */
71 bool
72 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
74 gimple *stmt = STMT_VINFO_STMT (stmt_info);
75 basic_block bb = gimple_bb (stmt);
76 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
77 struct loop* loop;
79 if (!loop_vinfo)
80 return false;
82 loop = LOOP_VINFO_LOOP (loop_vinfo);
84 return (bb->loop_father == loop->inner);
87 /* Record the cost of a statement, either by directly informing the
88 target model or by saving it in a vector for later processing.
89 Return a preliminary estimate of the statement's cost. */
91 unsigned
92 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
93 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
94 int misalign, enum vect_cost_model_location where)
96 if ((kind == vector_load || kind == unaligned_load)
97 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
98 kind = vector_gather_load;
99 if ((kind == vector_store || kind == unaligned_store)
100 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
101 kind = vector_scatter_store;
103 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
104 body_cost_vec->safe_push (si);
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113 static tree
114 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT_INFO and the vector is associated
123 with scalar destination SCALAR_DEST. */
125 static tree
126 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
127 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
129 tree vect_type, vect, vect_name, array_ref;
130 gimple *new_stmt;
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
144 return vect_name;
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT_INFO. */
151 static void
152 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
153 tree vect, tree array, unsigned HOST_WIDE_INT n)
155 tree array_ref;
156 gimple *new_stmt;
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
170 static tree
171 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 tree mem_ref;
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
178 return mem_ref;
181 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
182 Emit the clobber before *GSI. */
184 static void
185 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
186 tree var)
188 tree clobber = build_clobber (TREE_TYPE (var));
189 gimple *new_stmt = gimple_build_assign (var, clobber);
190 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
193 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
195 /* Function vect_mark_relevant.
197 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
199 static void
200 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
201 enum vect_relevant relevant, bool live_p)
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
206 if (dump_enabled_p ())
207 dump_printf_loc (MSG_NOTE, vect_location,
208 "mark relevant %d, live %d: %G", relevant, live_p,
209 stmt_info->stmt);
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_NOTE, vect_location,
224 "last stmt in pattern. don't mark"
225 " relevant/live.\n");
226 stmt_vec_info old_stmt_info = stmt_info;
227 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
228 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
229 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
230 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
233 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
234 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
235 STMT_VINFO_RELEVANT (stmt_info) = relevant;
237 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
238 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240 if (dump_enabled_p ())
241 dump_printf_loc (MSG_NOTE, vect_location,
242 "already marked relevant/live.\n");
243 return;
246 worklist->safe_push (stmt_info);
250 /* Function is_simple_and_all_uses_invariant
252 Return true if STMT_INFO is simple and all uses of it are invariant. */
254 bool
255 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
256 loop_vec_info loop_vinfo)
258 tree op;
259 ssa_op_iter iter;
261 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
262 if (!stmt)
263 return false;
265 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267 enum vect_def_type dt = vect_uninitialized_def;
269 if (!vect_is_simple_use (op, loop_vinfo, &dt))
271 if (dump_enabled_p ())
272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
273 "use not simple.\n");
274 return false;
277 if (dt != vect_external_def && dt != vect_constant_def)
278 return false;
280 return true;
283 /* Function vect_stmt_relevant_p.
285 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
286 is "relevant for vectorization".
288 A stmt is considered "relevant for vectorization" if:
289 - it has uses outside the loop.
290 - it has vdefs (it alters memory).
291 - control stmts in the loop (except for the exit condition).
293 CHECKME: what other side effects would the vectorizer allow? */
295 static bool
296 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
297 enum vect_relevant *relevant, bool *live_p)
299 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
300 ssa_op_iter op_iter;
301 imm_use_iterator imm_iter;
302 use_operand_p use_p;
303 def_operand_p def_p;
305 *relevant = vect_unused_in_scope;
306 *live_p = false;
308 /* cond stmt other than loop exit cond. */
309 if (is_ctrl_stmt (stmt_info->stmt)
310 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
311 *relevant = vect_used_in_scope;
313 /* changing memory. */
314 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
315 if (gimple_vdef (stmt_info->stmt)
316 && !gimple_clobber_p (stmt_info->stmt))
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant = vect_used_in_scope;
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
327 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 basic_block bb = gimple_bb (USE_STMT (use_p));
330 if (!flow_bb_inside_loop_p (loop, bb))
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE, vect_location,
334 "vec_stmt_relevant_p: used out of loop.\n");
336 if (is_gimple_debug (USE_STMT (use_p)))
337 continue;
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
342 gcc_assert (bb == single_exit (loop)->dest);
344 *live_p = true;
349 if (*live_p && *relevant == vect_unused_in_scope
350 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_NOTE, vect_location,
354 "vec_stmt_relevant_p: stmt live but not relevant.\n");
355 *relevant = vect_used_only_live;
358 return (*live_p || *relevant);
362 /* Function exist_non_indexing_operands_for_use_p
364 USE is one of the uses attached to STMT_INFO. Check if USE is
365 used in STMT_INFO for anything other than indexing an array. */
367 static bool
368 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
370 tree operand;
372 /* USE corresponds to some operand in STMT. If there is no data
373 reference in STMT, then any operand that corresponds to USE
374 is not indexing an array. */
375 if (!STMT_VINFO_DATA_REF (stmt_info))
376 return true;
378 /* STMT has a data_ref. FORNOW this means that its of one of
379 the following forms:
380 -1- ARRAY_REF = var
381 -2- var = ARRAY_REF
382 (This should have been verified in analyze_data_refs).
384 'var' in the second case corresponds to a def, not a use,
385 so USE cannot correspond to any operands that are not used
386 for array indexing.
388 Therefore, all we need to check is if STMT falls into the
389 first case, and whether var corresponds to USE. */
391 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
392 if (!assign || !gimple_assign_copy_p (assign))
394 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
395 if (call && gimple_call_internal_p (call))
397 internal_fn ifn = gimple_call_internal_fn (call);
398 int mask_index = internal_fn_mask_index (ifn);
399 if (mask_index >= 0
400 && use == gimple_call_arg (call, mask_index))
401 return true;
402 int stored_value_index = internal_fn_stored_value_index (ifn);
403 if (stored_value_index >= 0
404 && use == gimple_call_arg (call, stored_value_index))
405 return true;
406 if (internal_gather_scatter_fn_p (ifn)
407 && use == gimple_call_arg (call, 1))
408 return true;
410 return false;
413 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
414 return false;
415 operand = gimple_assign_rhs1 (assign);
416 if (TREE_CODE (operand) != SSA_NAME)
417 return false;
419 if (operand == use)
420 return true;
422 return false;
427 Function process_use.
429 Inputs:
430 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
431 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
432 that defined USE. This is done by calling mark_relevant and passing it
433 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
434 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435 be performed.
437 Outputs:
438 Generally, LIVE_P and RELEVANT are used to define the liveness and
439 relevance info of the DEF_STMT of this USE:
440 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
441 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
442 Exceptions:
443 - case 1: If USE is used only for address computations (e.g. array indexing),
444 which does not need to be directly vectorized, then the liveness/relevance
445 of the respective DEF_STMT is left unchanged.
446 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
447 we skip DEF_STMT cause it had already been processed.
448 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
449 "relevant" will be modified accordingly.
451 Return true if everything is as expected. Return false otherwise. */
453 static opt_result
454 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
455 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
456 bool force)
458 stmt_vec_info dstmt_vinfo;
459 basic_block bb, def_bb;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465 return opt_result::success ();
467 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468 return opt_result::failure_at (stmt_vinfo->stmt,
469 "not vectorized:"
470 " unsupported use in stmt.\n");
472 if (!dstmt_vinfo)
473 return opt_result::success ();
475 def_bb = gimple_bb (dstmt_vinfo->stmt);
477 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
478 DSTMT_VINFO must have already been processed, because this should be the
479 only way that STMT, which is a reduction-phi, was put in the worklist,
480 as there should be no other uses for DSTMT_VINFO in the loop. So we just
481 check that everything is as expected, and we are done. */
482 bb = gimple_bb (stmt_vinfo->stmt);
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
493 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
494 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
495 return opt_result::success ();
498 /* case 3a: outer-loop stmt defining an inner-loop stmt:
499 outer-loop-header-bb:
500 d = dstmt_vinfo
501 inner-loop:
502 stmt # use (d)
503 outer-loop-tail-bb:
504 ... */
505 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "outer-loop def-stmt defining inner-loop stmt.\n");
511 switch (relevant)
513 case vect_unused_in_scope:
514 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
515 vect_used_in_scope : vect_unused_in_scope;
516 break;
518 case vect_used_in_outer_by_reduction:
519 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
520 relevant = vect_used_by_reduction;
521 break;
523 case vect_used_in_outer:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
525 relevant = vect_used_in_scope;
526 break;
528 case vect_used_in_scope:
529 break;
531 default:
532 gcc_unreachable ();
536 /* case 3b: inner-loop stmt defining an outer-loop stmt:
537 outer-loop-header-bb:
539 inner-loop:
540 d = dstmt_vinfo
541 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
542 stmt # use (d) */
543 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
545 if (dump_enabled_p ())
546 dump_printf_loc (MSG_NOTE, vect_location,
547 "inner-loop def-stmt defining outer-loop stmt.\n");
549 switch (relevant)
551 case vect_unused_in_scope:
552 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
553 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
554 vect_used_in_outer_by_reduction : vect_unused_in_scope;
555 break;
557 case vect_used_by_reduction:
558 case vect_used_only_live:
559 relevant = vect_used_in_outer_by_reduction;
560 break;
562 case vect_used_in_scope:
563 relevant = vect_used_in_outer;
564 break;
566 default:
567 gcc_unreachable ();
570 /* We are also not interested in uses on loop PHI backedges that are
571 inductions. Otherwise we'll needlessly vectorize the IV increment
572 and cause hybrid SLP for SLP inductions. Unless the PHI is live
573 of course. */
574 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
575 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
576 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
577 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
578 loop_latch_edge (bb->loop_father))
579 == use))
581 if (dump_enabled_p ())
582 dump_printf_loc (MSG_NOTE, vect_location,
583 "induction value on backedge.\n");
584 return opt_result::success ();
588 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
589 return opt_result::success ();
593 /* Function vect_mark_stmts_to_be_vectorized.
595 Not all stmts in the loop need to be vectorized. For example:
597 for i...
598 for j...
599 1. T0 = i + j
600 2. T1 = a[T0]
602 3. j = j + 1
604 Stmt 1 and 3 do not need to be vectorized, because loop control and
605 addressing of vectorized data-refs are handled differently.
607 This pass detects such stmts. */
609 opt_result
610 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
612 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
613 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
614 unsigned int nbbs = loop->num_nodes;
615 gimple_stmt_iterator si;
616 unsigned int i;
617 basic_block bb;
618 bool live_p;
619 enum vect_relevant relevant;
621 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
623 auto_vec<stmt_vec_info, 64> worklist;
625 /* 1. Init worklist. */
626 for (i = 0; i < nbbs; i++)
628 bb = bbs[i];
629 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
631 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
632 if (dump_enabled_p ())
633 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
634 phi_info->stmt);
636 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
637 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
639 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
780 return res;
782 } /* while worklist */
784 return opt_result::success ();
787 /* Compute the prologue cost for invariant or constant operands. */
789 static unsigned
790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 unsigned opno, enum vect_def_type dt,
792 stmt_vector_for_cost *cost_vec)
794 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
795 tree op = gimple_op (stmt, opno);
796 unsigned prologue_cost = 0;
798 /* Without looking at the actual initializer a vector of
799 constants can be implemented as load from the constant pool.
800 When all elements are the same we can use a splat. */
801 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
802 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
803 unsigned num_vects_to_check;
804 unsigned HOST_WIDE_INT const_nunits;
805 unsigned nelt_limit;
806 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
807 && ! multiple_p (const_nunits, group_size))
809 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
810 nelt_limit = const_nunits;
812 else
814 /* If either the vector has variable length or the vectors
815 are composed of repeated whole groups we only need to
816 cost construction once. All vectors will be the same. */
817 num_vects_to_check = 1;
818 nelt_limit = group_size;
820 tree elt = NULL_TREE;
821 unsigned nelt = 0;
822 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
824 unsigned si = j % group_size;
825 if (nelt == 0)
826 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
827 /* ??? We're just tracking whether all operands of a single
828 vector initializer are the same, ideally we'd check if
829 we emitted the same one already. */
830 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
831 opno))
832 elt = NULL_TREE;
833 nelt++;
834 if (nelt == nelt_limit)
836 /* ??? We need to pass down stmt_info for a vector type
837 even if it points to the wrong stmt. */
838 prologue_cost += record_stmt_cost
839 (cost_vec, 1,
840 dt == vect_external_def
841 ? (elt ? scalar_to_vec : vec_construct)
842 : vector_load,
843 stmt_info, 0, vect_prologue);
844 nelt = 0;
848 return prologue_cost;
851 /* Function vect_model_simple_cost.
853 Models cost for simple operations, i.e. those that only emit ncopies of a
854 single op. Right now, this does not account for multiple insns that could
855 be generated for the single vector op. We will handle that shortly. */
857 static void
858 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
859 enum vect_def_type *dt,
860 int ndts,
861 slp_tree node,
862 stmt_vector_for_cost *cost_vec)
864 int inside_cost = 0, prologue_cost = 0;
866 gcc_assert (cost_vec != NULL);
868 /* ??? Somehow we need to fix this at the callers. */
869 if (node)
870 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
872 if (node)
874 /* Scan operands and account for prologue cost of constants/externals.
875 ??? This over-estimates cost for multiple uses and should be
876 re-engineered. */
877 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
878 tree lhs = gimple_get_lhs (stmt);
879 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
881 tree op = gimple_op (stmt, i);
882 enum vect_def_type dt;
883 if (!op || op == lhs)
884 continue;
885 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
886 && (dt == vect_constant_def || dt == vect_external_def))
887 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
888 i, dt, cost_vec);
891 else
892 /* Cost the "broadcast" of a scalar operand in to a vector operand.
893 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
894 cost model. */
895 for (int i = 0; i < ndts; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
898 stmt_info, 0, vect_prologue);
900 /* Adjust for two-operator SLP nodes. */
901 if (node && SLP_TREE_TWO_OPERATORS (node))
903 ncopies *= 2;
904 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
905 stmt_info, 0, vect_body);
908 /* Pass the inside-of-loop statements to the target-specific cost model. */
909 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
910 stmt_info, 0, vect_body);
912 if (dump_enabled_p ())
913 dump_printf_loc (MSG_NOTE, vect_location,
914 "vect_model_simple_cost: inside_cost = %d, "
915 "prologue_cost = %d .\n", inside_cost, prologue_cost);
919 /* Model cost for type demotion and promotion operations. PWR is normally
920 zero for single-step promotions and demotions. It will be one if
921 two-step promotion/demotion is required, and so on. Each additional
922 step doubles the number of instructions required. */
924 static void
925 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
926 enum vect_def_type *dt, int pwr,
927 stmt_vector_for_cost *cost_vec)
929 int i, tmp;
930 int inside_cost = 0, prologue_cost = 0;
932 for (i = 0; i < pwr + 1; i++)
934 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
935 (i + 1) : i;
936 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
937 vec_promote_demote, stmt_info, 0,
938 vect_body);
941 /* FORNOW: Assuming maximum 2 args per stmts. */
942 for (i = 0; i < 2; i++)
943 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
944 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
945 stmt_info, 0, vect_prologue);
947 if (dump_enabled_p ())
948 dump_printf_loc (MSG_NOTE, vect_location,
949 "vect_model_promotion_demotion_cost: inside_cost = %d, "
950 "prologue_cost = %d .\n", inside_cost, prologue_cost);
953 /* Returns true if the current function returns DECL. */
955 static bool
956 cfun_returns (tree decl)
958 edge_iterator ei;
959 edge e;
960 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
962 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
963 if (!ret)
964 continue;
965 if (gimple_return_retval (ret) == decl)
966 return true;
967 /* We often end up with an aggregate copy to the result decl,
968 handle that case as well. First skip intermediate clobbers
969 though. */
970 gimple *def = ret;
973 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
975 while (gimple_clobber_p (def));
976 if (is_a <gassign *> (def)
977 && gimple_assign_lhs (def) == gimple_return_retval (ret)
978 && gimple_assign_rhs1 (def) == decl)
979 return true;
981 return false;
984 /* Function vect_model_store_cost
986 Models cost for stores. In the case of grouped accesses, one access
987 has the overhead of the grouped access attributed to it. */
989 static void
990 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
991 enum vect_def_type dt,
992 vect_memory_access_type memory_access_type,
993 vec_load_store_type vls_type, slp_tree slp_node,
994 stmt_vector_for_cost *cost_vec)
996 unsigned int inside_cost = 0, prologue_cost = 0;
997 stmt_vec_info first_stmt_info = stmt_info;
998 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1000 /* ??? Somehow we need to fix this at the callers. */
1001 if (slp_node)
1002 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1004 if (vls_type == VLS_STORE_INVARIANT)
1006 if (slp_node)
1007 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1008 1, dt, cost_vec);
1009 else
1010 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1011 stmt_info, 0, vect_prologue);
1014 /* Grouped stores update all elements in the group at once,
1015 so we want the DR for the first statement. */
1016 if (!slp_node && grouped_access_p)
1017 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1019 /* True if we should include any once-per-group costs as well as
1020 the cost of the statement itself. For SLP we only get called
1021 once per group anyhow. */
1022 bool first_stmt_p = (first_stmt_info == stmt_info);
1024 /* We assume that the cost of a single store-lanes instruction is
1025 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1026 access is instead being provided by a permute-and-store operation,
1027 include the cost of the permutes. */
1028 if (first_stmt_p
1029 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1031 /* Uses a high and low interleave or shuffle operations for each
1032 needed permute. */
1033 int group_size = DR_GROUP_SIZE (first_stmt_info);
1034 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1035 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1036 stmt_info, 0, vect_body);
1038 if (dump_enabled_p ())
1039 dump_printf_loc (MSG_NOTE, vect_location,
1040 "vect_model_store_cost: strided group_size = %d .\n",
1041 group_size);
1044 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1045 /* Costs of the stores. */
1046 if (memory_access_type == VMAT_ELEMENTWISE
1047 || memory_access_type == VMAT_GATHER_SCATTER)
1049 /* N scalar stores plus extracting the elements. */
1050 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1051 inside_cost += record_stmt_cost (cost_vec,
1052 ncopies * assumed_nunits,
1053 scalar_store, stmt_info, 0, vect_body);
1055 else
1056 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1058 if (memory_access_type == VMAT_ELEMENTWISE
1059 || memory_access_type == VMAT_STRIDED_SLP)
1061 /* N scalar stores plus extracting the elements. */
1062 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1063 inside_cost += record_stmt_cost (cost_vec,
1064 ncopies * assumed_nunits,
1065 vec_to_scalar, stmt_info, 0, vect_body);
1068 /* When vectorizing a store into the function result assign
1069 a penalty if the function returns in a multi-register location.
1070 In this case we assume we'll end up with having to spill the
1071 vector result and do piecewise loads as a conservative estimate. */
1072 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1073 if (base
1074 && (TREE_CODE (base) == RESULT_DECL
1075 || (DECL_P (base) && cfun_returns (base)))
1076 && !aggregate_value_p (base, cfun->decl))
1078 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1079 /* ??? Handle PARALLEL in some way. */
1080 if (REG_P (reg))
1082 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1083 /* Assume that a single reg-reg move is possible and cheap,
1084 do not account for vector to gp register move cost. */
1085 if (nregs > 1)
1087 /* Spill. */
1088 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1089 vector_store,
1090 stmt_info, 0, vect_epilogue);
1091 /* Loads. */
1092 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1093 scalar_load,
1094 stmt_info, 0, vect_epilogue);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_store_cost: inside_cost = %d, "
1102 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1106 /* Calculate cost of DR's memory access. */
1107 void
1108 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1109 unsigned int *inside_cost,
1110 stmt_vector_for_cost *body_cost_vec)
1112 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1113 int alignment_support_scheme
1114 = vect_supportable_dr_alignment (dr_info, false);
1116 switch (alignment_support_scheme)
1118 case dr_aligned:
1120 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1121 vector_store, stmt_info, 0,
1122 vect_body);
1124 if (dump_enabled_p ())
1125 dump_printf_loc (MSG_NOTE, vect_location,
1126 "vect_model_store_cost: aligned.\n");
1127 break;
1130 case dr_unaligned_supported:
1132 /* Here, we assign an additional cost for the unaligned store. */
1133 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1134 unaligned_store, stmt_info,
1135 DR_MISALIGNMENT (dr_info),
1136 vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_store_cost: unaligned supported by "
1140 "hardware.\n");
1141 break;
1144 case dr_unaligned_unsupported:
1146 *inside_cost = VECT_MAX_COST;
1148 if (dump_enabled_p ())
1149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1150 "vect_model_store_cost: unsupported access.\n");
1151 break;
1154 default:
1155 gcc_unreachable ();
1160 /* Function vect_model_load_cost
1162 Models cost for loads. In the case of grouped accesses, one access has
1163 the overhead of the grouped access attributed to it. Since unaligned
1164 accesses are supported for loads, we also account for the costs of the
1165 access scheme chosen. */
1167 static void
1168 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1169 vect_memory_access_type memory_access_type,
1170 slp_instance instance,
1171 slp_tree slp_node,
1172 stmt_vector_for_cost *cost_vec)
1174 unsigned int inside_cost = 0, prologue_cost = 0;
1175 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1177 gcc_assert (cost_vec);
1179 /* ??? Somehow we need to fix this at the callers. */
1180 if (slp_node)
1181 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1183 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1185 /* If the load is permuted then the alignment is determined by
1186 the first group element not by the first scalar stmt DR. */
1187 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1188 /* Record the cost for the permutation. */
1189 unsigned n_perms;
1190 unsigned assumed_nunits
1191 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1192 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1193 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1194 slp_vf, instance, true,
1195 &n_perms);
1196 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1197 first_stmt_info, 0, vect_body);
1198 /* And adjust the number of loads performed. This handles
1199 redundancies as well as loads that are later dead. */
1200 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1201 bitmap_clear (perm);
1202 for (unsigned i = 0;
1203 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1204 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1205 ncopies = 0;
1206 bool load_seen = false;
1207 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1209 if (i % assumed_nunits == 0)
1211 if (load_seen)
1212 ncopies++;
1213 load_seen = false;
1215 if (bitmap_bit_p (perm, i))
1216 load_seen = true;
1218 if (load_seen)
1219 ncopies++;
1220 gcc_assert (ncopies
1221 <= (DR_GROUP_SIZE (first_stmt_info)
1222 - DR_GROUP_GAP (first_stmt_info)
1223 + assumed_nunits - 1) / assumed_nunits);
1226 /* Grouped loads read all elements in the group at once,
1227 so we want the DR for the first statement. */
1228 stmt_vec_info first_stmt_info = stmt_info;
1229 if (!slp_node && grouped_access_p)
1230 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1232 /* True if we should include any once-per-group costs as well as
1233 the cost of the statement itself. For SLP we only get called
1234 once per group anyhow. */
1235 bool first_stmt_p = (first_stmt_info == stmt_info);
1237 /* We assume that the cost of a single load-lanes instruction is
1238 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1239 access is instead being provided by a load-and-permute operation,
1240 include the cost of the permutes. */
1241 if (first_stmt_p
1242 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1244 /* Uses an even and odd extract operations or shuffle operations
1245 for each needed permute. */
1246 int group_size = DR_GROUP_SIZE (first_stmt_info);
1247 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1248 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1249 stmt_info, 0, vect_body);
1251 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE, vect_location,
1253 "vect_model_load_cost: strided group_size = %d .\n",
1254 group_size);
1257 /* The loads themselves. */
1258 if (memory_access_type == VMAT_ELEMENTWISE
1259 || memory_access_type == VMAT_GATHER_SCATTER)
1261 /* N scalar loads plus gathering them into a vector. */
1262 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1263 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1264 inside_cost += record_stmt_cost (cost_vec,
1265 ncopies * assumed_nunits,
1266 scalar_load, stmt_info, 0, vect_body);
1268 else
1269 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1270 &inside_cost, &prologue_cost,
1271 cost_vec, cost_vec, true);
1272 if (memory_access_type == VMAT_ELEMENTWISE
1273 || memory_access_type == VMAT_STRIDED_SLP)
1274 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1275 stmt_info, 0, vect_body);
1277 if (dump_enabled_p ())
1278 dump_printf_loc (MSG_NOTE, vect_location,
1279 "vect_model_load_cost: inside_cost = %d, "
1280 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1284 /* Calculate cost of DR's memory access. */
1285 void
1286 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1287 bool add_realign_cost, unsigned int *inside_cost,
1288 unsigned int *prologue_cost,
1289 stmt_vector_for_cost *prologue_cost_vec,
1290 stmt_vector_for_cost *body_cost_vec,
1291 bool record_prologue_costs)
1293 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1294 int alignment_support_scheme
1295 = vect_supportable_dr_alignment (dr_info, false);
1297 switch (alignment_support_scheme)
1299 case dr_aligned:
1301 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1302 stmt_info, 0, vect_body);
1304 if (dump_enabled_p ())
1305 dump_printf_loc (MSG_NOTE, vect_location,
1306 "vect_model_load_cost: aligned.\n");
1308 break;
1310 case dr_unaligned_supported:
1312 /* Here, we assign an additional cost for the unaligned load. */
1313 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1314 unaligned_load, stmt_info,
1315 DR_MISALIGNMENT (dr_info),
1316 vect_body);
1318 if (dump_enabled_p ())
1319 dump_printf_loc (MSG_NOTE, vect_location,
1320 "vect_model_load_cost: unaligned supported by "
1321 "hardware.\n");
1323 break;
1325 case dr_explicit_realign:
1327 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1328 vector_load, stmt_info, 0, vect_body);
1329 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1330 vec_perm, stmt_info, 0, vect_body);
1332 /* FIXME: If the misalignment remains fixed across the iterations of
1333 the containing loop, the following cost should be added to the
1334 prologue costs. */
1335 if (targetm.vectorize.builtin_mask_for_load)
1336 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1337 stmt_info, 0, vect_body);
1339 if (dump_enabled_p ())
1340 dump_printf_loc (MSG_NOTE, vect_location,
1341 "vect_model_load_cost: explicit realign\n");
1343 break;
1345 case dr_explicit_realign_optimized:
1347 if (dump_enabled_p ())
1348 dump_printf_loc (MSG_NOTE, vect_location,
1349 "vect_model_load_cost: unaligned software "
1350 "pipelined.\n");
1352 /* Unaligned software pipeline has a load of an address, an initial
1353 load, and possibly a mask operation to "prime" the loop. However,
1354 if this is an access in a group of loads, which provide grouped
1355 access, then the above cost should only be considered for one
1356 access in the group. Inside the loop, there is a load op
1357 and a realignment op. */
1359 if (add_realign_cost && record_prologue_costs)
1361 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1362 vector_stmt, stmt_info,
1363 0, vect_prologue);
1364 if (targetm.vectorize.builtin_mask_for_load)
1365 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1366 vector_stmt, stmt_info,
1367 0, vect_prologue);
1370 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1371 stmt_info, 0, vect_body);
1372 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1373 stmt_info, 0, vect_body);
1375 if (dump_enabled_p ())
1376 dump_printf_loc (MSG_NOTE, vect_location,
1377 "vect_model_load_cost: explicit realign optimized"
1378 "\n");
1380 break;
1383 case dr_unaligned_unsupported:
1385 *inside_cost = VECT_MAX_COST;
1387 if (dump_enabled_p ())
1388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1389 "vect_model_load_cost: unsupported access.\n");
1390 break;
1393 default:
1394 gcc_unreachable ();
1398 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1399 the loop preheader for the vectorized stmt STMT_VINFO. */
1401 static void
1402 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1403 gimple_stmt_iterator *gsi)
1405 if (gsi)
1406 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1407 else
1409 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1411 if (loop_vinfo)
1413 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1414 basic_block new_bb;
1415 edge pe;
1417 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1418 loop = loop->inner;
1420 pe = loop_preheader_edge (loop);
1421 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1422 gcc_assert (!new_bb);
1424 else
1426 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1427 basic_block bb;
1428 gimple_stmt_iterator gsi_bb_start;
1430 gcc_assert (bb_vinfo);
1431 bb = BB_VINFO_BB (bb_vinfo);
1432 gsi_bb_start = gsi_after_labels (bb);
1433 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1437 if (dump_enabled_p ())
1438 dump_printf_loc (MSG_NOTE, vect_location,
1439 "created new init_stmt: %G", new_stmt);
1442 /* Function vect_init_vector.
1444 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1445 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1446 vector type a vector with all elements equal to VAL is created first.
1447 Place the initialization at BSI if it is not NULL. Otherwise, place the
1448 initialization at the loop preheader.
1449 Return the DEF of INIT_STMT.
1450 It will be used in the vectorization of STMT_INFO. */
1452 tree
1453 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1454 gimple_stmt_iterator *gsi)
1456 gimple *init_stmt;
1457 tree new_temp;
1459 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1460 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1462 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1463 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1465 /* Scalar boolean value should be transformed into
1466 all zeros or all ones value before building a vector. */
1467 if (VECTOR_BOOLEAN_TYPE_P (type))
1469 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1470 tree false_val = build_zero_cst (TREE_TYPE (type));
1472 if (CONSTANT_CLASS_P (val))
1473 val = integer_zerop (val) ? false_val : true_val;
1474 else
1476 new_temp = make_ssa_name (TREE_TYPE (type));
1477 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1478 val, true_val, false_val);
1479 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1480 val = new_temp;
1483 else if (CONSTANT_CLASS_P (val))
1484 val = fold_convert (TREE_TYPE (type), val);
1485 else
1487 new_temp = make_ssa_name (TREE_TYPE (type));
1488 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1489 init_stmt = gimple_build_assign (new_temp,
1490 fold_build1 (VIEW_CONVERT_EXPR,
1491 TREE_TYPE (type),
1492 val));
1493 else
1494 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1495 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1496 val = new_temp;
1499 val = build_vector_from_val (type, val);
1502 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1503 init_stmt = gimple_build_assign (new_temp, val);
1504 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1505 return new_temp;
1508 /* Function vect_get_vec_def_for_operand_1.
1510 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1511 with type DT that will be used in the vectorized stmt. */
1513 tree
1514 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1515 enum vect_def_type dt)
1517 tree vec_oprnd;
1518 stmt_vec_info vec_stmt_info;
1520 switch (dt)
1522 /* operand is a constant or a loop invariant. */
1523 case vect_constant_def:
1524 case vect_external_def:
1525 /* Code should use vect_get_vec_def_for_operand. */
1526 gcc_unreachable ();
1528 /* Operand is defined by a loop header phi. In case of nested
1529 cycles we also may have uses of the backedge def. */
1530 case vect_reduction_def:
1531 case vect_double_reduction_def:
1532 case vect_nested_cycle:
1533 case vect_induction_def:
1534 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1535 || dt == vect_nested_cycle);
1536 /* Fallthru. */
1538 /* operand is defined inside the loop. */
1539 case vect_internal_def:
1541 /* Get the def from the vectorized stmt. */
1542 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1543 /* Get vectorized pattern statement. */
1544 if (!vec_stmt_info
1545 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1546 && !STMT_VINFO_RELEVANT (def_stmt_info))
1547 vec_stmt_info = (STMT_VINFO_VEC_STMT
1548 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1549 gcc_assert (vec_stmt_info);
1550 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1551 vec_oprnd = PHI_RESULT (phi);
1552 else
1553 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1554 return vec_oprnd;
1557 default:
1558 gcc_unreachable ();
1563 /* Function vect_get_vec_def_for_operand.
1565 OP is an operand in STMT_VINFO. This function returns a (vector) def
1566 that will be used in the vectorized stmt for STMT_VINFO.
1568 In the case that OP is an SSA_NAME which is defined in the loop, then
1569 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1571 In case OP is an invariant or constant, a new stmt that creates a vector def
1572 needs to be introduced. VECTYPE may be used to specify a required type for
1573 vector invariant. */
1575 tree
1576 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1578 gimple *def_stmt;
1579 enum vect_def_type dt;
1580 bool is_simple_use;
1581 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1583 if (dump_enabled_p ())
1584 dump_printf_loc (MSG_NOTE, vect_location,
1585 "vect_get_vec_def_for_operand: %T\n", op);
1587 stmt_vec_info def_stmt_info;
1588 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1589 &def_stmt_info, &def_stmt);
1590 gcc_assert (is_simple_use);
1591 if (def_stmt && dump_enabled_p ())
1592 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1594 if (dt == vect_constant_def || dt == vect_external_def)
1596 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1597 tree vector_type;
1599 if (vectype)
1600 vector_type = vectype;
1601 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1602 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1603 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1604 else
1605 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1607 gcc_assert (vector_type);
1608 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1610 else
1611 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1615 /* Function vect_get_vec_def_for_stmt_copy
1617 Return a vector-def for an operand. This function is used when the
1618 vectorized stmt to be created (by the caller to this function) is a "copy"
1619 created in case the vectorized result cannot fit in one vector, and several
1620 copies of the vector-stmt are required. In this case the vector-def is
1621 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1622 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1624 Context:
1625 In case the vectorization factor (VF) is bigger than the number
1626 of elements that can fit in a vectype (nunits), we have to generate
1627 more than one vector stmt to vectorize the scalar stmt. This situation
1628 arises when there are multiple data-types operated upon in the loop; the
1629 smallest data-type determines the VF, and as a result, when vectorizing
1630 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1631 vector stmt (each computing a vector of 'nunits' results, and together
1632 computing 'VF' results in each iteration). This function is called when
1633 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1634 which VF=16 and nunits=4, so the number of copies required is 4):
1636 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1638 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1639 VS1.1: vx.1 = memref1 VS1.2
1640 VS1.2: vx.2 = memref2 VS1.3
1641 VS1.3: vx.3 = memref3
1643 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1644 VSnew.1: vz1 = vx.1 + ... VSnew.2
1645 VSnew.2: vz2 = vx.2 + ... VSnew.3
1646 VSnew.3: vz3 = vx.3 + ...
1648 The vectorization of S1 is explained in vectorizable_load.
1649 The vectorization of S2:
1650 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1651 the function 'vect_get_vec_def_for_operand' is called to
1652 get the relevant vector-def for each operand of S2. For operand x it
1653 returns the vector-def 'vx.0'.
1655 To create the remaining copies of the vector-stmt (VSnew.j), this
1656 function is called to get the relevant vector-def for each operand. It is
1657 obtained from the respective VS1.j stmt, which is recorded in the
1658 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1660 For example, to obtain the vector-def 'vx.1' in order to create the
1661 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1662 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1663 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1664 and return its def ('vx.1').
1665 Overall, to create the above sequence this function will be called 3 times:
1666 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1667 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1668 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1670 tree
1671 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1673 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1674 if (!def_stmt_info)
1675 /* Do nothing; can reuse same def. */
1676 return vec_oprnd;
1678 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1679 gcc_assert (def_stmt_info);
1680 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1681 vec_oprnd = PHI_RESULT (phi);
1682 else
1683 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1684 return vec_oprnd;
1688 /* Get vectorized definitions for the operands to create a copy of an original
1689 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1691 void
1692 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1693 vec<tree> *vec_oprnds0,
1694 vec<tree> *vec_oprnds1)
1696 tree vec_oprnd = vec_oprnds0->pop ();
1698 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1699 vec_oprnds0->quick_push (vec_oprnd);
1701 if (vec_oprnds1 && vec_oprnds1->length ())
1703 vec_oprnd = vec_oprnds1->pop ();
1704 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1705 vec_oprnds1->quick_push (vec_oprnd);
1710 /* Get vectorized definitions for OP0 and OP1. */
1712 void
1713 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1714 vec<tree> *vec_oprnds0,
1715 vec<tree> *vec_oprnds1,
1716 slp_tree slp_node)
1718 if (slp_node)
1720 int nops = (op1 == NULL_TREE) ? 1 : 2;
1721 auto_vec<tree> ops (nops);
1722 auto_vec<vec<tree> > vec_defs (nops);
1724 ops.quick_push (op0);
1725 if (op1)
1726 ops.quick_push (op1);
1728 vect_get_slp_defs (ops, slp_node, &vec_defs);
1730 *vec_oprnds0 = vec_defs[0];
1731 if (op1)
1732 *vec_oprnds1 = vec_defs[1];
1734 else
1736 tree vec_oprnd;
1738 vec_oprnds0->create (1);
1739 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1740 vec_oprnds0->quick_push (vec_oprnd);
1742 if (op1)
1744 vec_oprnds1->create (1);
1745 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1746 vec_oprnds1->quick_push (vec_oprnd);
1751 /* Helper function called by vect_finish_replace_stmt and
1752 vect_finish_stmt_generation. Set the location of the new
1753 statement and create and return a stmt_vec_info for it. */
1755 static stmt_vec_info
1756 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1758 vec_info *vinfo = stmt_info->vinfo;
1760 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1762 if (dump_enabled_p ())
1763 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1765 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1767 /* While EH edges will generally prevent vectorization, stmt might
1768 e.g. be in a must-not-throw region. Ensure newly created stmts
1769 that could throw are part of the same region. */
1770 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1771 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1772 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1774 return vec_stmt_info;
1777 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1778 which sets the same scalar result as STMT_INFO did. Create and return a
1779 stmt_vec_info for VEC_STMT. */
1781 stmt_vec_info
1782 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1784 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1786 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1787 gsi_replace (&gsi, vec_stmt, true);
1789 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1792 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1793 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1795 stmt_vec_info
1796 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1797 gimple_stmt_iterator *gsi)
1799 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1801 if (!gsi_end_p (*gsi)
1802 && gimple_has_mem_ops (vec_stmt))
1804 gimple *at_stmt = gsi_stmt (*gsi);
1805 tree vuse = gimple_vuse (at_stmt);
1806 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1808 tree vdef = gimple_vdef (at_stmt);
1809 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1810 /* If we have an SSA vuse and insert a store, update virtual
1811 SSA form to avoid triggering the renamer. Do so only
1812 if we can easily see all uses - which is what almost always
1813 happens with the way vectorized stmts are inserted. */
1814 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1815 && ((is_gimple_assign (vec_stmt)
1816 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1817 || (is_gimple_call (vec_stmt)
1818 && !(gimple_call_flags (vec_stmt)
1819 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1821 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1822 gimple_set_vdef (vec_stmt, new_vdef);
1823 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1827 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1828 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1831 /* We want to vectorize a call to combined function CFN with function
1832 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1833 as the types of all inputs. Check whether this is possible using
1834 an internal function, returning its code if so or IFN_LAST if not. */
1836 static internal_fn
1837 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1838 tree vectype_out, tree vectype_in)
1840 internal_fn ifn;
1841 if (internal_fn_p (cfn))
1842 ifn = as_internal_fn (cfn);
1843 else
1844 ifn = associated_internal_fn (fndecl);
1845 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1847 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1848 if (info.vectorizable)
1850 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1851 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1852 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1853 OPTIMIZE_FOR_SPEED))
1854 return ifn;
1857 return IFN_LAST;
1861 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1862 gimple_stmt_iterator *);
1864 /* Check whether a load or store statement in the loop described by
1865 LOOP_VINFO is possible in a fully-masked loop. This is testing
1866 whether the vectorizer pass has the appropriate support, as well as
1867 whether the target does.
1869 VLS_TYPE says whether the statement is a load or store and VECTYPE
1870 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1871 says how the load or store is going to be implemented and GROUP_SIZE
1872 is the number of load or store statements in the containing group.
1873 If the access is a gather load or scatter store, GS_INFO describes
1874 its arguments.
1876 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1877 supported, otherwise record the required mask types. */
1879 static void
1880 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1881 vec_load_store_type vls_type, int group_size,
1882 vect_memory_access_type memory_access_type,
1883 gather_scatter_info *gs_info)
1885 /* Invariant loads need no special support. */
1886 if (memory_access_type == VMAT_INVARIANT)
1887 return;
1889 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1890 machine_mode vecmode = TYPE_MODE (vectype);
1891 bool is_load = (vls_type == VLS_LOAD);
1892 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1894 if (is_load
1895 ? !vect_load_lanes_supported (vectype, group_size, true)
1896 : !vect_store_lanes_supported (vectype, group_size, true))
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 "can't use a fully-masked loop because the"
1901 " target doesn't have an appropriate masked"
1902 " load/store-lanes instruction.\n");
1903 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1904 return;
1906 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1907 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1908 return;
1911 if (memory_access_type == VMAT_GATHER_SCATTER)
1913 internal_fn ifn = (is_load
1914 ? IFN_MASK_GATHER_LOAD
1915 : IFN_MASK_SCATTER_STORE);
1916 tree offset_type = TREE_TYPE (gs_info->offset);
1917 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1918 gs_info->memory_type,
1919 TYPE_SIGN (offset_type),
1920 gs_info->scale))
1922 if (dump_enabled_p ())
1923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1924 "can't use a fully-masked loop because the"
1925 " target doesn't have an appropriate masked"
1926 " gather load or scatter store instruction.\n");
1927 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1928 return;
1930 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1931 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1932 return;
1935 if (memory_access_type != VMAT_CONTIGUOUS
1936 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1938 /* Element X of the data must come from iteration i * VF + X of the
1939 scalar loop. We need more work to support other mappings. */
1940 if (dump_enabled_p ())
1941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1942 "can't use a fully-masked loop because an access"
1943 " isn't contiguous.\n");
1944 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1945 return;
1948 machine_mode mask_mode;
1949 if (!(targetm.vectorize.get_mask_mode
1950 (GET_MODE_NUNITS (vecmode),
1951 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1952 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1954 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1956 "can't use a fully-masked loop because the target"
1957 " doesn't have the appropriate masked load or"
1958 " store.\n");
1959 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1960 return;
1962 /* We might load more scalars than we need for permuting SLP loads.
1963 We checked in get_group_load_store_type that the extra elements
1964 don't leak into a new vector. */
1965 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1966 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1967 unsigned int nvectors;
1968 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1969 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1970 else
1971 gcc_unreachable ();
1974 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1975 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1976 that needs to be applied to all loads and stores in a vectorized loop.
1977 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1979 MASK_TYPE is the type of both masks. If new statements are needed,
1980 insert them before GSI. */
1982 static tree
1983 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1984 gimple_stmt_iterator *gsi)
1986 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1987 if (!loop_mask)
1988 return vec_mask;
1990 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1991 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1992 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1993 vec_mask, loop_mask);
1994 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1995 return and_res;
1998 /* Determine whether we can use a gather load or scatter store to vectorize
1999 strided load or store STMT_INFO by truncating the current offset to a
2000 smaller width. We need to be able to construct an offset vector:
2002 { 0, X, X*2, X*3, ... }
2004 without loss of precision, where X is STMT_INFO's DR_STEP.
2006 Return true if this is possible, describing the gather load or scatter
2007 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2009 static bool
2010 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2011 loop_vec_info loop_vinfo, bool masked_p,
2012 gather_scatter_info *gs_info)
2014 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2015 data_reference *dr = dr_info->dr;
2016 tree step = DR_STEP (dr);
2017 if (TREE_CODE (step) != INTEGER_CST)
2019 /* ??? Perhaps we could use range information here? */
2020 if (dump_enabled_p ())
2021 dump_printf_loc (MSG_NOTE, vect_location,
2022 "cannot truncate variable step.\n");
2023 return false;
2026 /* Get the number of bits in an element. */
2027 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2028 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2029 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2031 /* Set COUNT to the upper limit on the number of elements - 1.
2032 Start with the maximum vectorization factor. */
2033 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2035 /* Try lowering COUNT to the number of scalar latch iterations. */
2036 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2037 widest_int max_iters;
2038 if (max_loop_iterations (loop, &max_iters)
2039 && max_iters < count)
2040 count = max_iters.to_shwi ();
2042 /* Try scales of 1 and the element size. */
2043 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2044 wi::overflow_type overflow = wi::OVF_NONE;
2045 for (int i = 0; i < 2; ++i)
2047 int scale = scales[i];
2048 widest_int factor;
2049 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2050 continue;
2052 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2053 in OFFSET_BITS bits. */
2054 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2055 if (overflow)
2056 continue;
2057 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2058 if (wi::min_precision (range, sign) > element_bits)
2060 overflow = wi::OVF_UNKNOWN;
2061 continue;
2064 /* See whether the target supports the operation. */
2065 tree memory_type = TREE_TYPE (DR_REF (dr));
2066 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2067 memory_type, element_bits, sign, scale,
2068 &gs_info->ifn, &gs_info->element_type))
2069 continue;
2071 tree offset_type = build_nonstandard_integer_type (element_bits,
2072 sign == UNSIGNED);
2074 gs_info->decl = NULL_TREE;
2075 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2076 but we don't need to store that here. */
2077 gs_info->base = NULL_TREE;
2078 gs_info->offset = fold_convert (offset_type, step);
2079 gs_info->offset_dt = vect_constant_def;
2080 gs_info->offset_vectype = NULL_TREE;
2081 gs_info->scale = scale;
2082 gs_info->memory_type = memory_type;
2083 return true;
2086 if (overflow && dump_enabled_p ())
2087 dump_printf_loc (MSG_NOTE, vect_location,
2088 "truncating gather/scatter offset to %d bits"
2089 " might change its value.\n", element_bits);
2091 return false;
2094 /* Return true if we can use gather/scatter internal functions to
2095 vectorize STMT_INFO, which is a grouped or strided load or store.
2096 MASKED_P is true if load or store is conditional. When returning
2097 true, fill in GS_INFO with the information required to perform the
2098 operation. */
2100 static bool
2101 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2102 loop_vec_info loop_vinfo, bool masked_p,
2103 gather_scatter_info *gs_info)
2105 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2106 || gs_info->decl)
2107 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2108 masked_p, gs_info);
2110 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2111 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2112 tree offset_type = TREE_TYPE (gs_info->offset);
2113 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2115 /* Enforced by vect_check_gather_scatter. */
2116 gcc_assert (element_bits >= offset_bits);
2118 /* If the elements are wider than the offset, convert the offset to the
2119 same width, without changing its sign. */
2120 if (element_bits > offset_bits)
2122 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2123 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2124 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_NOTE, vect_location,
2129 "using gather/scatter for strided/grouped access,"
2130 " scale = %d\n", gs_info->scale);
2132 return true;
2135 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2136 elements with a known constant step. Return -1 if that step
2137 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2139 static int
2140 compare_step_with_zero (stmt_vec_info stmt_info)
2142 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2143 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2144 size_zero_node);
2147 /* If the target supports a permute mask that reverses the elements in
2148 a vector of type VECTYPE, return that mask, otherwise return null. */
2150 static tree
2151 perm_mask_for_reverse (tree vectype)
2153 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2155 /* The encoding has a single stepped pattern. */
2156 vec_perm_builder sel (nunits, 1, 3);
2157 for (int i = 0; i < 3; ++i)
2158 sel.quick_push (nunits - 1 - i);
2160 vec_perm_indices indices (sel, 1, nunits);
2161 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2162 return NULL_TREE;
2163 return vect_gen_perm_mask_checked (vectype, indices);
2166 /* STMT_INFO is either a masked or unconditional store. Return the value
2167 being stored. */
2169 tree
2170 vect_get_store_rhs (stmt_vec_info stmt_info)
2172 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2174 gcc_assert (gimple_assign_single_p (assign));
2175 return gimple_assign_rhs1 (assign);
2177 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2179 internal_fn ifn = gimple_call_internal_fn (call);
2180 int index = internal_fn_stored_value_index (ifn);
2181 gcc_assert (index >= 0);
2182 return gimple_call_arg (call, index);
2184 gcc_unreachable ();
2187 /* A subroutine of get_load_store_type, with a subset of the same
2188 arguments. Handle the case where STMT_INFO is part of a grouped load
2189 or store.
2191 For stores, the statements in the group are all consecutive
2192 and there is no gap at the end. For loads, the statements in the
2193 group might not be consecutive; there can be gaps between statements
2194 as well as at the end. */
2196 static bool
2197 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2198 bool masked_p, vec_load_store_type vls_type,
2199 vect_memory_access_type *memory_access_type,
2200 gather_scatter_info *gs_info)
2202 vec_info *vinfo = stmt_info->vinfo;
2203 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2204 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2205 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2206 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2207 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2208 bool single_element_p = (stmt_info == first_stmt_info
2209 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2210 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2211 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2213 /* True if the vectorized statements would access beyond the last
2214 statement in the group. */
2215 bool overrun_p = false;
2217 /* True if we can cope with such overrun by peeling for gaps, so that
2218 there is at least one final scalar iteration after the vector loop. */
2219 bool can_overrun_p = (!masked_p
2220 && vls_type == VLS_LOAD
2221 && loop_vinfo
2222 && !loop->inner);
2224 /* There can only be a gap at the end of the group if the stride is
2225 known at compile time. */
2226 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2228 /* Stores can't yet have gaps. */
2229 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2231 if (slp)
2233 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2235 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2236 separated by the stride, until we have a complete vector.
2237 Fall back to scalar accesses if that isn't possible. */
2238 if (multiple_p (nunits, group_size))
2239 *memory_access_type = VMAT_STRIDED_SLP;
2240 else
2241 *memory_access_type = VMAT_ELEMENTWISE;
2243 else
2245 overrun_p = loop_vinfo && gap != 0;
2246 if (overrun_p && vls_type != VLS_LOAD)
2248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2249 "Grouped store with gaps requires"
2250 " non-consecutive accesses\n");
2251 return false;
2253 /* An overrun is fine if the trailing elements are smaller
2254 than the alignment boundary B. Every vector access will
2255 be a multiple of B and so we are guaranteed to access a
2256 non-gap element in the same B-sized block. */
2257 if (overrun_p
2258 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2259 / vect_get_scalar_dr_size (first_dr_info)))
2260 overrun_p = false;
2262 /* If the gap splits the vector in half and the target
2263 can do half-vector operations avoid the epilogue peeling
2264 by simply loading half of the vector only. Usually
2265 the construction with an upper zero half will be elided. */
2266 dr_alignment_support alignment_support_scheme;
2267 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2268 machine_mode vmode;
2269 if (overrun_p
2270 && !masked_p
2271 && (((alignment_support_scheme
2272 = vect_supportable_dr_alignment (first_dr_info, false)))
2273 == dr_aligned
2274 || alignment_support_scheme == dr_unaligned_supported)
2275 && known_eq (nunits, (group_size - gap) * 2)
2276 && known_eq (nunits, group_size)
2277 && mode_for_vector (elmode, (group_size - gap)).exists (&vmode)
2278 && VECTOR_MODE_P (vmode)
2279 && targetm.vector_mode_supported_p (vmode)
2280 && (convert_optab_handler (vec_init_optab,
2281 TYPE_MODE (vectype), vmode)
2282 != CODE_FOR_nothing))
2283 overrun_p = false;
2285 if (overrun_p && !can_overrun_p)
2287 if (dump_enabled_p ())
2288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2289 "Peeling for outer loop is not supported\n");
2290 return false;
2292 *memory_access_type = VMAT_CONTIGUOUS;
2295 else
2297 /* We can always handle this case using elementwise accesses,
2298 but see if something more efficient is available. */
2299 *memory_access_type = VMAT_ELEMENTWISE;
2301 /* If there is a gap at the end of the group then these optimizations
2302 would access excess elements in the last iteration. */
2303 bool would_overrun_p = (gap != 0);
2304 /* An overrun is fine if the trailing elements are smaller than the
2305 alignment boundary B. Every vector access will be a multiple of B
2306 and so we are guaranteed to access a non-gap element in the
2307 same B-sized block. */
2308 if (would_overrun_p
2309 && !masked_p
2310 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2311 / vect_get_scalar_dr_size (first_dr_info)))
2312 would_overrun_p = false;
2314 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2315 && (can_overrun_p || !would_overrun_p)
2316 && compare_step_with_zero (stmt_info) > 0)
2318 /* First cope with the degenerate case of a single-element
2319 vector. */
2320 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2321 *memory_access_type = VMAT_CONTIGUOUS;
2323 /* Otherwise try using LOAD/STORE_LANES. */
2324 if (*memory_access_type == VMAT_ELEMENTWISE
2325 && (vls_type == VLS_LOAD
2326 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2327 : vect_store_lanes_supported (vectype, group_size,
2328 masked_p)))
2330 *memory_access_type = VMAT_LOAD_STORE_LANES;
2331 overrun_p = would_overrun_p;
2334 /* If that fails, try using permuting loads. */
2335 if (*memory_access_type == VMAT_ELEMENTWISE
2336 && (vls_type == VLS_LOAD
2337 ? vect_grouped_load_supported (vectype, single_element_p,
2338 group_size)
2339 : vect_grouped_store_supported (vectype, group_size)))
2341 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2342 overrun_p = would_overrun_p;
2346 /* As a last resort, trying using a gather load or scatter store.
2348 ??? Although the code can handle all group sizes correctly,
2349 it probably isn't a win to use separate strided accesses based
2350 on nearby locations. Or, even if it's a win over scalar code,
2351 it might not be a win over vectorizing at a lower VF, if that
2352 allows us to use contiguous accesses. */
2353 if (*memory_access_type == VMAT_ELEMENTWISE
2354 && single_element_p
2355 && loop_vinfo
2356 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2357 masked_p, gs_info))
2358 *memory_access_type = VMAT_GATHER_SCATTER;
2361 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2363 /* STMT is the leader of the group. Check the operands of all the
2364 stmts of the group. */
2365 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2366 while (next_stmt_info)
2368 tree op = vect_get_store_rhs (next_stmt_info);
2369 enum vect_def_type dt;
2370 if (!vect_is_simple_use (op, vinfo, &dt))
2372 if (dump_enabled_p ())
2373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2374 "use not simple.\n");
2375 return false;
2377 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2381 if (overrun_p)
2383 gcc_assert (can_overrun_p);
2384 if (dump_enabled_p ())
2385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2386 "Data access with gaps requires scalar "
2387 "epilogue loop\n");
2388 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2391 return true;
2394 /* A subroutine of get_load_store_type, with a subset of the same
2395 arguments. Handle the case where STMT_INFO is a load or store that
2396 accesses consecutive elements with a negative step. */
2398 static vect_memory_access_type
2399 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2400 vec_load_store_type vls_type,
2401 unsigned int ncopies)
2403 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2404 dr_alignment_support alignment_support_scheme;
2406 if (ncopies > 1)
2408 if (dump_enabled_p ())
2409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2410 "multiple types with negative step.\n");
2411 return VMAT_ELEMENTWISE;
2414 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2415 if (alignment_support_scheme != dr_aligned
2416 && alignment_support_scheme != dr_unaligned_supported)
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2420 "negative step but alignment required.\n");
2421 return VMAT_ELEMENTWISE;
2424 if (vls_type == VLS_STORE_INVARIANT)
2426 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_NOTE, vect_location,
2428 "negative step with invariant source;"
2429 " no permute needed.\n");
2430 return VMAT_CONTIGUOUS_DOWN;
2433 if (!perm_mask_for_reverse (vectype))
2435 if (dump_enabled_p ())
2436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2437 "negative step and reversing not supported.\n");
2438 return VMAT_ELEMENTWISE;
2441 return VMAT_CONTIGUOUS_REVERSE;
2444 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2445 if there is a memory access type that the vectorized form can use,
2446 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2447 or scatters, fill in GS_INFO accordingly.
2449 SLP says whether we're performing SLP rather than loop vectorization.
2450 MASKED_P is true if the statement is conditional on a vectorized mask.
2451 VECTYPE is the vector type that the vectorized statements will use.
2452 NCOPIES is the number of vector statements that will be needed. */
2454 static bool
2455 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2456 bool masked_p, vec_load_store_type vls_type,
2457 unsigned int ncopies,
2458 vect_memory_access_type *memory_access_type,
2459 gather_scatter_info *gs_info)
2461 vec_info *vinfo = stmt_info->vinfo;
2462 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2463 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2464 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2466 *memory_access_type = VMAT_GATHER_SCATTER;
2467 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2468 gcc_unreachable ();
2469 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2470 &gs_info->offset_dt,
2471 &gs_info->offset_vectype))
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "%s index use not simple.\n",
2476 vls_type == VLS_LOAD ? "gather" : "scatter");
2477 return false;
2480 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2482 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2483 vls_type, memory_access_type, gs_info))
2484 return false;
2486 else if (STMT_VINFO_STRIDED_P (stmt_info))
2488 gcc_assert (!slp);
2489 if (loop_vinfo
2490 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2491 masked_p, gs_info))
2492 *memory_access_type = VMAT_GATHER_SCATTER;
2493 else
2494 *memory_access_type = VMAT_ELEMENTWISE;
2496 else
2498 int cmp = compare_step_with_zero (stmt_info);
2499 if (cmp < 0)
2500 *memory_access_type = get_negative_load_store_type
2501 (stmt_info, vectype, vls_type, ncopies);
2502 else if (cmp == 0)
2504 gcc_assert (vls_type == VLS_LOAD);
2505 *memory_access_type = VMAT_INVARIANT;
2507 else
2508 *memory_access_type = VMAT_CONTIGUOUS;
2511 if ((*memory_access_type == VMAT_ELEMENTWISE
2512 || *memory_access_type == VMAT_STRIDED_SLP)
2513 && !nunits.is_constant ())
2515 if (dump_enabled_p ())
2516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2517 "Not using elementwise accesses due to variable "
2518 "vectorization factor.\n");
2519 return false;
2522 /* FIXME: At the moment the cost model seems to underestimate the
2523 cost of using elementwise accesses. This check preserves the
2524 traditional behavior until that can be fixed. */
2525 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2526 if (!first_stmt_info)
2527 first_stmt_info = stmt_info;
2528 if (*memory_access_type == VMAT_ELEMENTWISE
2529 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2530 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2531 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2532 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2534 if (dump_enabled_p ())
2535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2536 "not falling back to elementwise accesses\n");
2537 return false;
2539 return true;
2542 /* Return true if boolean argument MASK is suitable for vectorizing
2543 conditional load or store STMT_INFO. When returning true, store the type
2544 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2545 in *MASK_VECTYPE_OUT. */
2547 static bool
2548 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2549 vect_def_type *mask_dt_out,
2550 tree *mask_vectype_out)
2552 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2554 if (dump_enabled_p ())
2555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2556 "mask argument is not a boolean.\n");
2557 return false;
2560 if (TREE_CODE (mask) != SSA_NAME)
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2564 "mask argument is not an SSA name.\n");
2565 return false;
2568 enum vect_def_type mask_dt;
2569 tree mask_vectype;
2570 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2572 if (dump_enabled_p ())
2573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2574 "mask use not simple.\n");
2575 return false;
2578 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2579 if (!mask_vectype)
2580 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2582 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2584 if (dump_enabled_p ())
2585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2586 "could not find an appropriate vector mask type.\n");
2587 return false;
2590 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2591 TYPE_VECTOR_SUBPARTS (vectype)))
2593 if (dump_enabled_p ())
2594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2595 "vector mask type %T",
2596 " does not match vector data type %T.\n",
2597 mask_vectype, vectype);
2599 return false;
2602 *mask_dt_out = mask_dt;
2603 *mask_vectype_out = mask_vectype;
2604 return true;
2607 /* Return true if stored value RHS is suitable for vectorizing store
2608 statement STMT_INFO. When returning true, store the type of the
2609 definition in *RHS_DT_OUT, the type of the vectorized store value in
2610 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2612 static bool
2613 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2614 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2615 vec_load_store_type *vls_type_out)
2617 /* In the case this is a store from a constant make sure
2618 native_encode_expr can handle it. */
2619 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2621 if (dump_enabled_p ())
2622 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2623 "cannot encode constant as a byte sequence.\n");
2624 return false;
2627 enum vect_def_type rhs_dt;
2628 tree rhs_vectype;
2629 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2631 if (dump_enabled_p ())
2632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2633 "use not simple.\n");
2634 return false;
2637 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2638 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2640 if (dump_enabled_p ())
2641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2642 "incompatible vector types.\n");
2643 return false;
2646 *rhs_dt_out = rhs_dt;
2647 *rhs_vectype_out = rhs_vectype;
2648 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2649 *vls_type_out = VLS_STORE_INVARIANT;
2650 else
2651 *vls_type_out = VLS_STORE;
2652 return true;
2655 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2656 Note that we support masks with floating-point type, in which case the
2657 floats are interpreted as a bitmask. */
2659 static tree
2660 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2662 if (TREE_CODE (masktype) == INTEGER_TYPE)
2663 return build_int_cst (masktype, -1);
2664 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2666 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2667 mask = build_vector_from_val (masktype, mask);
2668 return vect_init_vector (stmt_info, mask, masktype, NULL);
2670 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2672 REAL_VALUE_TYPE r;
2673 long tmp[6];
2674 for (int j = 0; j < 6; ++j)
2675 tmp[j] = -1;
2676 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2677 tree mask = build_real (TREE_TYPE (masktype), r);
2678 mask = build_vector_from_val (masktype, mask);
2679 return vect_init_vector (stmt_info, mask, masktype, NULL);
2681 gcc_unreachable ();
2684 /* Build an all-zero merge value of type VECTYPE while vectorizing
2685 STMT_INFO as a gather load. */
2687 static tree
2688 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2690 tree merge;
2691 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2692 merge = build_int_cst (TREE_TYPE (vectype), 0);
2693 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2695 REAL_VALUE_TYPE r;
2696 long tmp[6];
2697 for (int j = 0; j < 6; ++j)
2698 tmp[j] = 0;
2699 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2700 merge = build_real (TREE_TYPE (vectype), r);
2702 else
2703 gcc_unreachable ();
2704 merge = build_vector_from_val (vectype, merge);
2705 return vect_init_vector (stmt_info, merge, vectype, NULL);
2708 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2709 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2710 the gather load operation. If the load is conditional, MASK is the
2711 unvectorized condition and MASK_DT is its definition type, otherwise
2712 MASK is null. */
2714 static void
2715 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2716 gimple_stmt_iterator *gsi,
2717 stmt_vec_info *vec_stmt,
2718 gather_scatter_info *gs_info,
2719 tree mask)
2721 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2722 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2723 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2724 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2725 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2726 edge pe = loop_preheader_edge (loop);
2727 enum { NARROW, NONE, WIDEN } modifier;
2728 poly_uint64 gather_off_nunits
2729 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2731 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2732 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2733 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2734 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2735 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2736 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2737 tree scaletype = TREE_VALUE (arglist);
2738 tree real_masktype = masktype;
2739 gcc_checking_assert (types_compatible_p (srctype, rettype)
2740 && (!mask
2741 || TREE_CODE (masktype) == INTEGER_TYPE
2742 || types_compatible_p (srctype, masktype)));
2743 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2744 masktype = build_same_sized_truth_vector_type (srctype);
2746 tree mask_halftype = masktype;
2747 tree perm_mask = NULL_TREE;
2748 tree mask_perm_mask = NULL_TREE;
2749 if (known_eq (nunits, gather_off_nunits))
2750 modifier = NONE;
2751 else if (known_eq (nunits * 2, gather_off_nunits))
2753 modifier = WIDEN;
2755 /* Currently widening gathers and scatters are only supported for
2756 fixed-length vectors. */
2757 int count = gather_off_nunits.to_constant ();
2758 vec_perm_builder sel (count, count, 1);
2759 for (int i = 0; i < count; ++i)
2760 sel.quick_push (i | (count / 2));
2762 vec_perm_indices indices (sel, 1, count);
2763 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2764 indices);
2766 else if (known_eq (nunits, gather_off_nunits * 2))
2768 modifier = NARROW;
2770 /* Currently narrowing gathers and scatters are only supported for
2771 fixed-length vectors. */
2772 int count = nunits.to_constant ();
2773 vec_perm_builder sel (count, count, 1);
2774 sel.quick_grow (count);
2775 for (int i = 0; i < count; ++i)
2776 sel[i] = i < count / 2 ? i : i + count / 2;
2777 vec_perm_indices indices (sel, 2, count);
2778 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2780 ncopies *= 2;
2782 if (mask && masktype == real_masktype)
2784 for (int i = 0; i < count; ++i)
2785 sel[i] = i | (count / 2);
2786 indices.new_vector (sel, 2, count);
2787 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2789 else if (mask)
2790 mask_halftype
2791 = build_same_sized_truth_vector_type (gs_info->offset_vectype);
2793 else
2794 gcc_unreachable ();
2796 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2797 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2799 tree ptr = fold_convert (ptrtype, gs_info->base);
2800 if (!is_gimple_min_invariant (ptr))
2802 gimple_seq seq;
2803 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2804 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2805 gcc_assert (!new_bb);
2808 tree scale = build_int_cst (scaletype, gs_info->scale);
2810 tree vec_oprnd0 = NULL_TREE;
2811 tree vec_mask = NULL_TREE;
2812 tree src_op = NULL_TREE;
2813 tree mask_op = NULL_TREE;
2814 tree prev_res = NULL_TREE;
2815 stmt_vec_info prev_stmt_info = NULL;
2817 if (!mask)
2819 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2820 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2823 for (int j = 0; j < ncopies; ++j)
2825 tree op, var;
2826 if (modifier == WIDEN && (j & 1))
2827 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2828 perm_mask, stmt_info, gsi);
2829 else if (j == 0)
2830 op = vec_oprnd0
2831 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2832 else
2833 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2834 vec_oprnd0);
2836 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2838 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2839 TYPE_VECTOR_SUBPARTS (idxtype)));
2840 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2841 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2842 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2843 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2844 op = var;
2847 if (mask)
2849 if (mask_perm_mask && (j & 1))
2850 mask_op = permute_vec_elements (mask_op, mask_op,
2851 mask_perm_mask, stmt_info, gsi);
2852 else
2854 if (j == 0)
2855 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2856 else if (modifier != NARROW || (j & 1) == 0)
2857 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2858 vec_mask);
2860 mask_op = vec_mask;
2861 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2863 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2864 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2865 gcc_assert (known_eq (sub1, sub2));
2866 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2867 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2868 gassign *new_stmt
2869 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2870 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2871 mask_op = var;
2874 if (modifier == NARROW && masktype != real_masktype)
2876 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2877 gassign *new_stmt
2878 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2879 : VEC_UNPACK_LO_EXPR,
2880 mask_op);
2881 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2882 mask_op = var;
2884 src_op = mask_op;
2887 tree mask_arg = mask_op;
2888 if (masktype != real_masktype)
2890 tree utype, optype = TREE_TYPE (mask_op);
2891 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2892 utype = real_masktype;
2893 else
2894 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2895 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2896 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2897 gassign *new_stmt
2898 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2899 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2900 mask_arg = var;
2901 if (!useless_type_conversion_p (real_masktype, utype))
2903 gcc_assert (TYPE_PRECISION (utype)
2904 <= TYPE_PRECISION (real_masktype));
2905 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2906 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2907 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2908 mask_arg = var;
2910 src_op = build_zero_cst (srctype);
2912 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2913 mask_arg, scale);
2915 stmt_vec_info new_stmt_info;
2916 if (!useless_type_conversion_p (vectype, rettype))
2918 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2919 TYPE_VECTOR_SUBPARTS (rettype)));
2920 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2921 gimple_call_set_lhs (new_call, op);
2922 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2923 var = make_ssa_name (vec_dest);
2924 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2925 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2926 new_stmt_info
2927 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2929 else
2931 var = make_ssa_name (vec_dest, new_call);
2932 gimple_call_set_lhs (new_call, var);
2933 new_stmt_info
2934 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2937 if (modifier == NARROW)
2939 if ((j & 1) == 0)
2941 prev_res = var;
2942 continue;
2944 var = permute_vec_elements (prev_res, var, perm_mask,
2945 stmt_info, gsi);
2946 new_stmt_info = loop_vinfo->lookup_def (var);
2949 if (prev_stmt_info == NULL)
2950 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2951 else
2952 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2953 prev_stmt_info = new_stmt_info;
2957 /* Prepare the base and offset in GS_INFO for vectorization.
2958 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2959 to the vectorized offset argument for the first copy of STMT_INFO.
2960 STMT_INFO is the statement described by GS_INFO and LOOP is the
2961 containing loop. */
2963 static void
2964 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2965 gather_scatter_info *gs_info,
2966 tree *dataref_ptr, tree *vec_offset)
2968 gimple_seq stmts = NULL;
2969 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2970 if (stmts != NULL)
2972 basic_block new_bb;
2973 edge pe = loop_preheader_edge (loop);
2974 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2975 gcc_assert (!new_bb);
2977 tree offset_type = TREE_TYPE (gs_info->offset);
2978 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2979 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2980 offset_vectype);
2983 /* Prepare to implement a grouped or strided load or store using
2984 the gather load or scatter store operation described by GS_INFO.
2985 STMT_INFO is the load or store statement.
2987 Set *DATAREF_BUMP to the amount that should be added to the base
2988 address after each copy of the vectorized statement. Set *VEC_OFFSET
2989 to an invariant offset vector in which element I has the value
2990 I * DR_STEP / SCALE. */
2992 static void
2993 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2994 loop_vec_info loop_vinfo,
2995 gather_scatter_info *gs_info,
2996 tree *dataref_bump, tree *vec_offset)
2998 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2999 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3000 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3001 gimple_seq stmts;
3003 tree bump = size_binop (MULT_EXPR,
3004 fold_convert (sizetype, DR_STEP (dr)),
3005 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3006 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
3007 if (stmts)
3008 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3010 /* The offset given in GS_INFO can have pointer type, so use the element
3011 type of the vector instead. */
3012 tree offset_type = TREE_TYPE (gs_info->offset);
3013 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
3014 offset_type = TREE_TYPE (offset_vectype);
3016 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3017 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
3018 ssize_int (gs_info->scale));
3019 step = fold_convert (offset_type, step);
3020 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3022 /* Create {0, X, X*2, X*3, ...}. */
3023 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
3024 build_zero_cst (offset_type), step);
3025 if (stmts)
3026 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3029 /* Return the amount that should be added to a vector pointer to move
3030 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3031 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3032 vectorization. */
3034 static tree
3035 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3036 vect_memory_access_type memory_access_type)
3038 if (memory_access_type == VMAT_INVARIANT)
3039 return size_zero_node;
3041 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3042 tree step = vect_dr_behavior (dr_info)->step;
3043 if (tree_int_cst_sgn (step) == -1)
3044 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3045 return iv_step;
3048 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3050 static bool
3051 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3052 stmt_vec_info *vec_stmt, slp_tree slp_node,
3053 tree vectype_in, stmt_vector_for_cost *cost_vec)
3055 tree op, vectype;
3056 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3057 vec_info *vinfo = stmt_info->vinfo;
3058 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3059 unsigned ncopies;
3061 op = gimple_call_arg (stmt, 0);
3062 vectype = STMT_VINFO_VECTYPE (stmt_info);
3063 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3065 /* Multiple types in SLP are handled by creating the appropriate number of
3066 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3067 case of SLP. */
3068 if (slp_node)
3069 ncopies = 1;
3070 else
3071 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3073 gcc_assert (ncopies >= 1);
3075 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3076 if (! char_vectype)
3077 return false;
3079 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3080 unsigned word_bytes;
3081 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3082 return false;
3084 /* The encoding uses one stepped pattern for each byte in the word. */
3085 vec_perm_builder elts (num_bytes, word_bytes, 3);
3086 for (unsigned i = 0; i < 3; ++i)
3087 for (unsigned j = 0; j < word_bytes; ++j)
3088 elts.quick_push ((i + 1) * word_bytes - j - 1);
3090 vec_perm_indices indices (elts, 1, num_bytes);
3091 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3092 return false;
3094 if (! vec_stmt)
3096 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3097 DUMP_VECT_SCOPE ("vectorizable_bswap");
3098 if (! slp_node)
3100 record_stmt_cost (cost_vec,
3101 1, vector_stmt, stmt_info, 0, vect_prologue);
3102 record_stmt_cost (cost_vec,
3103 ncopies, vec_perm, stmt_info, 0, vect_body);
3105 return true;
3108 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3110 /* Transform. */
3111 vec<tree> vec_oprnds = vNULL;
3112 stmt_vec_info new_stmt_info = NULL;
3113 stmt_vec_info prev_stmt_info = NULL;
3114 for (unsigned j = 0; j < ncopies; j++)
3116 /* Handle uses. */
3117 if (j == 0)
3118 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3119 else
3120 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3122 /* Arguments are ready. create the new vector stmt. */
3123 unsigned i;
3124 tree vop;
3125 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3127 gimple *new_stmt;
3128 tree tem = make_ssa_name (char_vectype);
3129 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3130 char_vectype, vop));
3131 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3132 tree tem2 = make_ssa_name (char_vectype);
3133 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3134 tem, tem, bswap_vconst);
3135 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3136 tem = make_ssa_name (vectype);
3137 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3138 vectype, tem2));
3139 new_stmt_info
3140 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3141 if (slp_node)
3142 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3145 if (slp_node)
3146 continue;
3148 if (j == 0)
3149 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3150 else
3151 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3153 prev_stmt_info = new_stmt_info;
3156 vec_oprnds.release ();
3157 return true;
3160 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3161 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3162 in a single step. On success, store the binary pack code in
3163 *CONVERT_CODE. */
3165 static bool
3166 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3167 tree_code *convert_code)
3169 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3170 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3171 return false;
3173 tree_code code;
3174 int multi_step_cvt = 0;
3175 auto_vec <tree, 8> interm_types;
3176 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3177 &code, &multi_step_cvt,
3178 &interm_types)
3179 || multi_step_cvt)
3180 return false;
3182 *convert_code = code;
3183 return true;
3186 /* Function vectorizable_call.
3188 Check if STMT_INFO performs a function call that can be vectorized.
3189 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3190 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3191 Return true if STMT_INFO is vectorizable in this way. */
3193 static bool
3194 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3195 stmt_vec_info *vec_stmt, slp_tree slp_node,
3196 stmt_vector_for_cost *cost_vec)
3198 gcall *stmt;
3199 tree vec_dest;
3200 tree scalar_dest;
3201 tree op;
3202 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3203 stmt_vec_info prev_stmt_info;
3204 tree vectype_out, vectype_in;
3205 poly_uint64 nunits_in;
3206 poly_uint64 nunits_out;
3207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3208 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3209 vec_info *vinfo = stmt_info->vinfo;
3210 tree fndecl, new_temp, rhs_type;
3211 enum vect_def_type dt[4]
3212 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3213 vect_unknown_def_type };
3214 tree vectypes[ARRAY_SIZE (dt)] = {};
3215 int ndts = ARRAY_SIZE (dt);
3216 int ncopies, j;
3217 auto_vec<tree, 8> vargs;
3218 auto_vec<tree, 8> orig_vargs;
3219 enum { NARROW, NONE, WIDEN } modifier;
3220 size_t i, nargs;
3221 tree lhs;
3223 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3224 return false;
3226 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3227 && ! vec_stmt)
3228 return false;
3230 /* Is STMT_INFO a vectorizable call? */
3231 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3232 if (!stmt)
3233 return false;
3235 if (gimple_call_internal_p (stmt)
3236 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3237 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3238 /* Handled by vectorizable_load and vectorizable_store. */
3239 return false;
3241 if (gimple_call_lhs (stmt) == NULL_TREE
3242 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3243 return false;
3245 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3247 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3249 /* Process function arguments. */
3250 rhs_type = NULL_TREE;
3251 vectype_in = NULL_TREE;
3252 nargs = gimple_call_num_args (stmt);
3254 /* Bail out if the function has more than three arguments, we do not have
3255 interesting builtin functions to vectorize with more than two arguments
3256 except for fma. No arguments is also not good. */
3257 if (nargs == 0 || nargs > 4)
3258 return false;
3260 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3261 combined_fn cfn = gimple_call_combined_fn (stmt);
3262 if (cfn == CFN_GOMP_SIMD_LANE)
3264 nargs = 0;
3265 rhs_type = unsigned_type_node;
3268 int mask_opno = -1;
3269 if (internal_fn_p (cfn))
3270 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3272 for (i = 0; i < nargs; i++)
3274 op = gimple_call_arg (stmt, i);
3275 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3277 if (dump_enabled_p ())
3278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3279 "use not simple.\n");
3280 return false;
3283 /* Skip the mask argument to an internal function. This operand
3284 has been converted via a pattern if necessary. */
3285 if ((int) i == mask_opno)
3286 continue;
3288 /* We can only handle calls with arguments of the same type. */
3289 if (rhs_type
3290 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294 "argument types differ.\n");
3295 return false;
3297 if (!rhs_type)
3298 rhs_type = TREE_TYPE (op);
3300 if (!vectype_in)
3301 vectype_in = vectypes[i];
3302 else if (vectypes[i]
3303 && vectypes[i] != vectype_in)
3305 if (dump_enabled_p ())
3306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3307 "argument vector types differ.\n");
3308 return false;
3311 /* If all arguments are external or constant defs use a vector type with
3312 the same size as the output vector type. */
3313 if (!vectype_in)
3314 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3315 if (vec_stmt)
3316 gcc_assert (vectype_in);
3317 if (!vectype_in)
3319 if (dump_enabled_p ())
3320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3321 "no vectype for scalar type %T\n", rhs_type);
3323 return false;
3326 /* FORNOW */
3327 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3328 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3329 if (known_eq (nunits_in * 2, nunits_out))
3330 modifier = NARROW;
3331 else if (known_eq (nunits_out, nunits_in))
3332 modifier = NONE;
3333 else if (known_eq (nunits_out * 2, nunits_in))
3334 modifier = WIDEN;
3335 else
3336 return false;
3338 /* We only handle functions that do not read or clobber memory. */
3339 if (gimple_vuse (stmt))
3341 if (dump_enabled_p ())
3342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3343 "function reads from or writes to memory.\n");
3344 return false;
3347 /* For now, we only vectorize functions if a target specific builtin
3348 is available. TODO -- in some cases, it might be profitable to
3349 insert the calls for pieces of the vector, in order to be able
3350 to vectorize other operations in the loop. */
3351 fndecl = NULL_TREE;
3352 internal_fn ifn = IFN_LAST;
3353 tree callee = gimple_call_fndecl (stmt);
3355 /* First try using an internal function. */
3356 tree_code convert_code = ERROR_MARK;
3357 if (cfn != CFN_LAST
3358 && (modifier == NONE
3359 || (modifier == NARROW
3360 && simple_integer_narrowing (vectype_out, vectype_in,
3361 &convert_code))))
3362 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3363 vectype_in);
3365 /* If that fails, try asking for a target-specific built-in function. */
3366 if (ifn == IFN_LAST)
3368 if (cfn != CFN_LAST)
3369 fndecl = targetm.vectorize.builtin_vectorized_function
3370 (cfn, vectype_out, vectype_in);
3371 else if (callee)
3372 fndecl = targetm.vectorize.builtin_md_vectorized_function
3373 (callee, vectype_out, vectype_in);
3376 if (ifn == IFN_LAST && !fndecl)
3378 if (cfn == CFN_GOMP_SIMD_LANE
3379 && !slp_node
3380 && loop_vinfo
3381 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3382 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3383 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3384 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3386 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3387 { 0, 1, 2, ... vf - 1 } vector. */
3388 gcc_assert (nargs == 0);
3390 else if (modifier == NONE
3391 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3392 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3393 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3394 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3395 vectype_in, cost_vec);
3396 else
3398 if (dump_enabled_p ())
3399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3400 "function is not vectorizable.\n");
3401 return false;
3405 if (slp_node)
3406 ncopies = 1;
3407 else if (modifier == NARROW && ifn == IFN_LAST)
3408 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3409 else
3410 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3412 /* Sanity check: make sure that at least one copy of the vectorized stmt
3413 needs to be generated. */
3414 gcc_assert (ncopies >= 1);
3416 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3417 if (!vec_stmt) /* transformation not required. */
3419 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3420 DUMP_VECT_SCOPE ("vectorizable_call");
3421 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3422 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3423 record_stmt_cost (cost_vec, ncopies / 2,
3424 vec_promote_demote, stmt_info, 0, vect_body);
3426 if (loop_vinfo && mask_opno >= 0)
3428 unsigned int nvectors = (slp_node
3429 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3430 : ncopies);
3431 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3433 return true;
3436 /* Transform. */
3438 if (dump_enabled_p ())
3439 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3441 /* Handle def. */
3442 scalar_dest = gimple_call_lhs (stmt);
3443 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3445 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3447 stmt_vec_info new_stmt_info = NULL;
3448 prev_stmt_info = NULL;
3449 if (modifier == NONE || ifn != IFN_LAST)
3451 tree prev_res = NULL_TREE;
3452 vargs.safe_grow (nargs);
3453 orig_vargs.safe_grow (nargs);
3454 for (j = 0; j < ncopies; ++j)
3456 /* Build argument list for the vectorized call. */
3457 if (slp_node)
3459 auto_vec<vec<tree> > vec_defs (nargs);
3460 vec<tree> vec_oprnds0;
3462 for (i = 0; i < nargs; i++)
3463 vargs[i] = gimple_call_arg (stmt, i);
3464 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3465 vec_oprnds0 = vec_defs[0];
3467 /* Arguments are ready. Create the new vector stmt. */
3468 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3470 size_t k;
3471 for (k = 0; k < nargs; k++)
3473 vec<tree> vec_oprndsk = vec_defs[k];
3474 vargs[k] = vec_oprndsk[i];
3476 if (modifier == NARROW)
3478 /* We don't define any narrowing conditional functions
3479 at present. */
3480 gcc_assert (mask_opno < 0);
3481 tree half_res = make_ssa_name (vectype_in);
3482 gcall *call
3483 = gimple_build_call_internal_vec (ifn, vargs);
3484 gimple_call_set_lhs (call, half_res);
3485 gimple_call_set_nothrow (call, true);
3486 new_stmt_info
3487 = vect_finish_stmt_generation (stmt_info, call, gsi);
3488 if ((i & 1) == 0)
3490 prev_res = half_res;
3491 continue;
3493 new_temp = make_ssa_name (vec_dest);
3494 gimple *new_stmt
3495 = gimple_build_assign (new_temp, convert_code,
3496 prev_res, half_res);
3497 new_stmt_info
3498 = vect_finish_stmt_generation (stmt_info, new_stmt,
3499 gsi);
3501 else
3503 if (mask_opno >= 0 && masked_loop_p)
3505 unsigned int vec_num = vec_oprnds0.length ();
3506 /* Always true for SLP. */
3507 gcc_assert (ncopies == 1);
3508 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3509 vectype_out, i);
3510 vargs[mask_opno] = prepare_load_store_mask
3511 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3514 gcall *call;
3515 if (ifn != IFN_LAST)
3516 call = gimple_build_call_internal_vec (ifn, vargs);
3517 else
3518 call = gimple_build_call_vec (fndecl, vargs);
3519 new_temp = make_ssa_name (vec_dest, call);
3520 gimple_call_set_lhs (call, new_temp);
3521 gimple_call_set_nothrow (call, true);
3522 new_stmt_info
3523 = vect_finish_stmt_generation (stmt_info, call, gsi);
3525 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3528 for (i = 0; i < nargs; i++)
3530 vec<tree> vec_oprndsi = vec_defs[i];
3531 vec_oprndsi.release ();
3533 continue;
3536 if (mask_opno >= 0 && !vectypes[mask_opno])
3538 gcc_assert (modifier != WIDEN);
3539 vectypes[mask_opno]
3540 = build_same_sized_truth_vector_type (vectype_in);
3543 for (i = 0; i < nargs; i++)
3545 op = gimple_call_arg (stmt, i);
3546 if (j == 0)
3547 vec_oprnd0
3548 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3549 else
3550 vec_oprnd0
3551 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3553 orig_vargs[i] = vargs[i] = vec_oprnd0;
3556 if (mask_opno >= 0 && masked_loop_p)
3558 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3559 vectype_out, j);
3560 vargs[mask_opno]
3561 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3562 vargs[mask_opno], gsi);
3565 if (cfn == CFN_GOMP_SIMD_LANE)
3567 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3568 tree new_var
3569 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3570 gimple *init_stmt = gimple_build_assign (new_var, cst);
3571 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3572 new_temp = make_ssa_name (vec_dest);
3573 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3574 new_stmt_info
3575 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3577 else if (modifier == NARROW)
3579 /* We don't define any narrowing conditional functions at
3580 present. */
3581 gcc_assert (mask_opno < 0);
3582 tree half_res = make_ssa_name (vectype_in);
3583 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3584 gimple_call_set_lhs (call, half_res);
3585 gimple_call_set_nothrow (call, true);
3586 new_stmt_info
3587 = vect_finish_stmt_generation (stmt_info, call, gsi);
3588 if ((j & 1) == 0)
3590 prev_res = half_res;
3591 continue;
3593 new_temp = make_ssa_name (vec_dest);
3594 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3595 prev_res, half_res);
3596 new_stmt_info
3597 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3599 else
3601 gcall *call;
3602 if (ifn != IFN_LAST)
3603 call = gimple_build_call_internal_vec (ifn, vargs);
3604 else
3605 call = gimple_build_call_vec (fndecl, vargs);
3606 new_temp = make_ssa_name (vec_dest, call);
3607 gimple_call_set_lhs (call, new_temp);
3608 gimple_call_set_nothrow (call, true);
3609 new_stmt_info
3610 = vect_finish_stmt_generation (stmt_info, call, gsi);
3613 if (j == (modifier == NARROW ? 1 : 0))
3614 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3615 else
3616 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3618 prev_stmt_info = new_stmt_info;
3621 else if (modifier == NARROW)
3623 /* We don't define any narrowing conditional functions at present. */
3624 gcc_assert (mask_opno < 0);
3625 for (j = 0; j < ncopies; ++j)
3627 /* Build argument list for the vectorized call. */
3628 if (j == 0)
3629 vargs.create (nargs * 2);
3630 else
3631 vargs.truncate (0);
3633 if (slp_node)
3635 auto_vec<vec<tree> > vec_defs (nargs);
3636 vec<tree> vec_oprnds0;
3638 for (i = 0; i < nargs; i++)
3639 vargs.quick_push (gimple_call_arg (stmt, i));
3640 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3641 vec_oprnds0 = vec_defs[0];
3643 /* Arguments are ready. Create the new vector stmt. */
3644 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3646 size_t k;
3647 vargs.truncate (0);
3648 for (k = 0; k < nargs; k++)
3650 vec<tree> vec_oprndsk = vec_defs[k];
3651 vargs.quick_push (vec_oprndsk[i]);
3652 vargs.quick_push (vec_oprndsk[i + 1]);
3654 gcall *call;
3655 if (ifn != IFN_LAST)
3656 call = gimple_build_call_internal_vec (ifn, vargs);
3657 else
3658 call = gimple_build_call_vec (fndecl, vargs);
3659 new_temp = make_ssa_name (vec_dest, call);
3660 gimple_call_set_lhs (call, new_temp);
3661 gimple_call_set_nothrow (call, true);
3662 new_stmt_info
3663 = vect_finish_stmt_generation (stmt_info, call, gsi);
3664 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3667 for (i = 0; i < nargs; i++)
3669 vec<tree> vec_oprndsi = vec_defs[i];
3670 vec_oprndsi.release ();
3672 continue;
3675 for (i = 0; i < nargs; i++)
3677 op = gimple_call_arg (stmt, i);
3678 if (j == 0)
3680 vec_oprnd0
3681 = vect_get_vec_def_for_operand (op, stmt_info,
3682 vectypes[i]);
3683 vec_oprnd1
3684 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3686 else
3688 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3689 2 * i + 1);
3690 vec_oprnd0
3691 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3692 vec_oprnd1
3693 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3696 vargs.quick_push (vec_oprnd0);
3697 vargs.quick_push (vec_oprnd1);
3700 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3701 new_temp = make_ssa_name (vec_dest, new_stmt);
3702 gimple_call_set_lhs (new_stmt, new_temp);
3703 new_stmt_info
3704 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3706 if (j == 0)
3707 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3708 else
3709 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3711 prev_stmt_info = new_stmt_info;
3714 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3716 else
3717 /* No current target implements this case. */
3718 return false;
3720 vargs.release ();
3722 /* The call in STMT might prevent it from being removed in dce.
3723 We however cannot remove it here, due to the way the ssa name
3724 it defines is mapped to the new definition. So just replace
3725 rhs of the statement with something harmless. */
3727 if (slp_node)
3728 return true;
3730 stmt_info = vect_orig_stmt (stmt_info);
3731 lhs = gimple_get_lhs (stmt_info->stmt);
3733 gassign *new_stmt
3734 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3735 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3737 return true;
3741 struct simd_call_arg_info
3743 tree vectype;
3744 tree op;
3745 HOST_WIDE_INT linear_step;
3746 enum vect_def_type dt;
3747 unsigned int align;
3748 bool simd_lane_linear;
3751 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3752 is linear within simd lane (but not within whole loop), note it in
3753 *ARGINFO. */
3755 static void
3756 vect_simd_lane_linear (tree op, struct loop *loop,
3757 struct simd_call_arg_info *arginfo)
3759 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3761 if (!is_gimple_assign (def_stmt)
3762 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3763 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3764 return;
3766 tree base = gimple_assign_rhs1 (def_stmt);
3767 HOST_WIDE_INT linear_step = 0;
3768 tree v = gimple_assign_rhs2 (def_stmt);
3769 while (TREE_CODE (v) == SSA_NAME)
3771 tree t;
3772 def_stmt = SSA_NAME_DEF_STMT (v);
3773 if (is_gimple_assign (def_stmt))
3774 switch (gimple_assign_rhs_code (def_stmt))
3776 case PLUS_EXPR:
3777 t = gimple_assign_rhs2 (def_stmt);
3778 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3779 return;
3780 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3781 v = gimple_assign_rhs1 (def_stmt);
3782 continue;
3783 case MULT_EXPR:
3784 t = gimple_assign_rhs2 (def_stmt);
3785 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3786 return;
3787 linear_step = tree_to_shwi (t);
3788 v = gimple_assign_rhs1 (def_stmt);
3789 continue;
3790 CASE_CONVERT:
3791 t = gimple_assign_rhs1 (def_stmt);
3792 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3793 || (TYPE_PRECISION (TREE_TYPE (v))
3794 < TYPE_PRECISION (TREE_TYPE (t))))
3795 return;
3796 if (!linear_step)
3797 linear_step = 1;
3798 v = t;
3799 continue;
3800 default:
3801 return;
3803 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3804 && loop->simduid
3805 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3806 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3807 == loop->simduid))
3809 if (!linear_step)
3810 linear_step = 1;
3811 arginfo->linear_step = linear_step;
3812 arginfo->op = base;
3813 arginfo->simd_lane_linear = true;
3814 return;
3819 /* Return the number of elements in vector type VECTYPE, which is associated
3820 with a SIMD clone. At present these vectors always have a constant
3821 length. */
3823 static unsigned HOST_WIDE_INT
3824 simd_clone_subparts (tree vectype)
3826 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3829 /* Function vectorizable_simd_clone_call.
3831 Check if STMT_INFO performs a function call that can be vectorized
3832 by calling a simd clone of the function.
3833 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3834 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3835 Return true if STMT_INFO is vectorizable in this way. */
3837 static bool
3838 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3839 gimple_stmt_iterator *gsi,
3840 stmt_vec_info *vec_stmt, slp_tree slp_node,
3841 stmt_vector_for_cost *)
3843 tree vec_dest;
3844 tree scalar_dest;
3845 tree op, type;
3846 tree vec_oprnd0 = NULL_TREE;
3847 stmt_vec_info prev_stmt_info;
3848 tree vectype;
3849 unsigned int nunits;
3850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3852 vec_info *vinfo = stmt_info->vinfo;
3853 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3854 tree fndecl, new_temp;
3855 int ncopies, j;
3856 auto_vec<simd_call_arg_info> arginfo;
3857 vec<tree> vargs = vNULL;
3858 size_t i, nargs;
3859 tree lhs, rtype, ratype;
3860 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3862 /* Is STMT a vectorizable call? */
3863 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3864 if (!stmt)
3865 return false;
3867 fndecl = gimple_call_fndecl (stmt);
3868 if (fndecl == NULL_TREE)
3869 return false;
3871 struct cgraph_node *node = cgraph_node::get (fndecl);
3872 if (node == NULL || node->simd_clones == NULL)
3873 return false;
3875 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3876 return false;
3878 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3879 && ! vec_stmt)
3880 return false;
3882 if (gimple_call_lhs (stmt)
3883 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3884 return false;
3886 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3888 vectype = STMT_VINFO_VECTYPE (stmt_info);
3890 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3891 return false;
3893 /* FORNOW */
3894 if (slp_node)
3895 return false;
3897 /* Process function arguments. */
3898 nargs = gimple_call_num_args (stmt);
3900 /* Bail out if the function has zero arguments. */
3901 if (nargs == 0)
3902 return false;
3904 arginfo.reserve (nargs, true);
3906 for (i = 0; i < nargs; i++)
3908 simd_call_arg_info thisarginfo;
3909 affine_iv iv;
3911 thisarginfo.linear_step = 0;
3912 thisarginfo.align = 0;
3913 thisarginfo.op = NULL_TREE;
3914 thisarginfo.simd_lane_linear = false;
3916 op = gimple_call_arg (stmt, i);
3917 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3918 &thisarginfo.vectype)
3919 || thisarginfo.dt == vect_uninitialized_def)
3921 if (dump_enabled_p ())
3922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3923 "use not simple.\n");
3924 return false;
3927 if (thisarginfo.dt == vect_constant_def
3928 || thisarginfo.dt == vect_external_def)
3929 gcc_assert (thisarginfo.vectype == NULL_TREE);
3930 else
3931 gcc_assert (thisarginfo.vectype != NULL_TREE);
3933 /* For linear arguments, the analyze phase should have saved
3934 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3935 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3936 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3938 gcc_assert (vec_stmt);
3939 thisarginfo.linear_step
3940 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3941 thisarginfo.op
3942 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3943 thisarginfo.simd_lane_linear
3944 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3945 == boolean_true_node);
3946 /* If loop has been peeled for alignment, we need to adjust it. */
3947 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3948 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3949 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3951 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3952 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3953 tree opt = TREE_TYPE (thisarginfo.op);
3954 bias = fold_convert (TREE_TYPE (step), bias);
3955 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3956 thisarginfo.op
3957 = fold_build2 (POINTER_TYPE_P (opt)
3958 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3959 thisarginfo.op, bias);
3962 else if (!vec_stmt
3963 && thisarginfo.dt != vect_constant_def
3964 && thisarginfo.dt != vect_external_def
3965 && loop_vinfo
3966 && TREE_CODE (op) == SSA_NAME
3967 && simple_iv (loop, loop_containing_stmt (stmt), op,
3968 &iv, false)
3969 && tree_fits_shwi_p (iv.step))
3971 thisarginfo.linear_step = tree_to_shwi (iv.step);
3972 thisarginfo.op = iv.base;
3974 else if ((thisarginfo.dt == vect_constant_def
3975 || thisarginfo.dt == vect_external_def)
3976 && POINTER_TYPE_P (TREE_TYPE (op)))
3977 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3978 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3979 linear too. */
3980 if (POINTER_TYPE_P (TREE_TYPE (op))
3981 && !thisarginfo.linear_step
3982 && !vec_stmt
3983 && thisarginfo.dt != vect_constant_def
3984 && thisarginfo.dt != vect_external_def
3985 && loop_vinfo
3986 && !slp_node
3987 && TREE_CODE (op) == SSA_NAME)
3988 vect_simd_lane_linear (op, loop, &thisarginfo);
3990 arginfo.quick_push (thisarginfo);
3993 unsigned HOST_WIDE_INT vf;
3994 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3996 if (dump_enabled_p ())
3997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3998 "not considering SIMD clones; not yet supported"
3999 " for variable-width vectors.\n");
4000 return false;
4003 unsigned int badness = 0;
4004 struct cgraph_node *bestn = NULL;
4005 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4006 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4007 else
4008 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4009 n = n->simdclone->next_clone)
4011 unsigned int this_badness = 0;
4012 if (n->simdclone->simdlen > vf
4013 || n->simdclone->nargs != nargs)
4014 continue;
4015 if (n->simdclone->simdlen < vf)
4016 this_badness += (exact_log2 (vf)
4017 - exact_log2 (n->simdclone->simdlen)) * 1024;
4018 if (n->simdclone->inbranch)
4019 this_badness += 2048;
4020 int target_badness = targetm.simd_clone.usable (n);
4021 if (target_badness < 0)
4022 continue;
4023 this_badness += target_badness * 512;
4024 /* FORNOW: Have to add code to add the mask argument. */
4025 if (n->simdclone->inbranch)
4026 continue;
4027 for (i = 0; i < nargs; i++)
4029 switch (n->simdclone->args[i].arg_type)
4031 case SIMD_CLONE_ARG_TYPE_VECTOR:
4032 if (!useless_type_conversion_p
4033 (n->simdclone->args[i].orig_type,
4034 TREE_TYPE (gimple_call_arg (stmt, i))))
4035 i = -1;
4036 else if (arginfo[i].dt == vect_constant_def
4037 || arginfo[i].dt == vect_external_def
4038 || arginfo[i].linear_step)
4039 this_badness += 64;
4040 break;
4041 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4042 if (arginfo[i].dt != vect_constant_def
4043 && arginfo[i].dt != vect_external_def)
4044 i = -1;
4045 break;
4046 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4047 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4048 if (arginfo[i].dt == vect_constant_def
4049 || arginfo[i].dt == vect_external_def
4050 || (arginfo[i].linear_step
4051 != n->simdclone->args[i].linear_step))
4052 i = -1;
4053 break;
4054 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4055 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4056 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4057 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4058 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4059 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4060 /* FORNOW */
4061 i = -1;
4062 break;
4063 case SIMD_CLONE_ARG_TYPE_MASK:
4064 gcc_unreachable ();
4066 if (i == (size_t) -1)
4067 break;
4068 if (n->simdclone->args[i].alignment > arginfo[i].align)
4070 i = -1;
4071 break;
4073 if (arginfo[i].align)
4074 this_badness += (exact_log2 (arginfo[i].align)
4075 - exact_log2 (n->simdclone->args[i].alignment));
4077 if (i == (size_t) -1)
4078 continue;
4079 if (bestn == NULL || this_badness < badness)
4081 bestn = n;
4082 badness = this_badness;
4086 if (bestn == NULL)
4087 return false;
4089 for (i = 0; i < nargs; i++)
4090 if ((arginfo[i].dt == vect_constant_def
4091 || arginfo[i].dt == vect_external_def)
4092 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4094 arginfo[i].vectype
4095 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4096 i)));
4097 if (arginfo[i].vectype == NULL
4098 || (simd_clone_subparts (arginfo[i].vectype)
4099 > bestn->simdclone->simdlen))
4100 return false;
4103 fndecl = bestn->decl;
4104 nunits = bestn->simdclone->simdlen;
4105 ncopies = vf / nunits;
4107 /* If the function isn't const, only allow it in simd loops where user
4108 has asserted that at least nunits consecutive iterations can be
4109 performed using SIMD instructions. */
4110 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4111 && gimple_vuse (stmt))
4112 return false;
4114 /* Sanity check: make sure that at least one copy of the vectorized stmt
4115 needs to be generated. */
4116 gcc_assert (ncopies >= 1);
4118 if (!vec_stmt) /* transformation not required. */
4120 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4121 for (i = 0; i < nargs; i++)
4122 if ((bestn->simdclone->args[i].arg_type
4123 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4124 || (bestn->simdclone->args[i].arg_type
4125 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4127 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4128 + 1);
4129 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4130 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4131 ? size_type_node : TREE_TYPE (arginfo[i].op);
4132 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4133 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4134 tree sll = arginfo[i].simd_lane_linear
4135 ? boolean_true_node : boolean_false_node;
4136 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4138 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4139 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4140 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4141 return true;
4144 /* Transform. */
4146 if (dump_enabled_p ())
4147 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4149 /* Handle def. */
4150 scalar_dest = gimple_call_lhs (stmt);
4151 vec_dest = NULL_TREE;
4152 rtype = NULL_TREE;
4153 ratype = NULL_TREE;
4154 if (scalar_dest)
4156 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4157 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4158 if (TREE_CODE (rtype) == ARRAY_TYPE)
4160 ratype = rtype;
4161 rtype = TREE_TYPE (ratype);
4165 prev_stmt_info = NULL;
4166 for (j = 0; j < ncopies; ++j)
4168 /* Build argument list for the vectorized call. */
4169 if (j == 0)
4170 vargs.create (nargs);
4171 else
4172 vargs.truncate (0);
4174 for (i = 0; i < nargs; i++)
4176 unsigned int k, l, m, o;
4177 tree atype;
4178 op = gimple_call_arg (stmt, i);
4179 switch (bestn->simdclone->args[i].arg_type)
4181 case SIMD_CLONE_ARG_TYPE_VECTOR:
4182 atype = bestn->simdclone->args[i].vector_type;
4183 o = nunits / simd_clone_subparts (atype);
4184 for (m = j * o; m < (j + 1) * o; m++)
4186 if (simd_clone_subparts (atype)
4187 < simd_clone_subparts (arginfo[i].vectype))
4189 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4190 k = (simd_clone_subparts (arginfo[i].vectype)
4191 / simd_clone_subparts (atype));
4192 gcc_assert ((k & (k - 1)) == 0);
4193 if (m == 0)
4194 vec_oprnd0
4195 = vect_get_vec_def_for_operand (op, stmt_info);
4196 else
4198 vec_oprnd0 = arginfo[i].op;
4199 if ((m & (k - 1)) == 0)
4200 vec_oprnd0
4201 = vect_get_vec_def_for_stmt_copy (vinfo,
4202 vec_oprnd0);
4204 arginfo[i].op = vec_oprnd0;
4205 vec_oprnd0
4206 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4207 bitsize_int (prec),
4208 bitsize_int ((m & (k - 1)) * prec));
4209 gassign *new_stmt
4210 = gimple_build_assign (make_ssa_name (atype),
4211 vec_oprnd0);
4212 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4213 vargs.safe_push (gimple_assign_lhs (new_stmt));
4215 else
4217 k = (simd_clone_subparts (atype)
4218 / simd_clone_subparts (arginfo[i].vectype));
4219 gcc_assert ((k & (k - 1)) == 0);
4220 vec<constructor_elt, va_gc> *ctor_elts;
4221 if (k != 1)
4222 vec_alloc (ctor_elts, k);
4223 else
4224 ctor_elts = NULL;
4225 for (l = 0; l < k; l++)
4227 if (m == 0 && l == 0)
4228 vec_oprnd0
4229 = vect_get_vec_def_for_operand (op, stmt_info);
4230 else
4231 vec_oprnd0
4232 = vect_get_vec_def_for_stmt_copy (vinfo,
4233 arginfo[i].op);
4234 arginfo[i].op = vec_oprnd0;
4235 if (k == 1)
4236 break;
4237 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4238 vec_oprnd0);
4240 if (k == 1)
4241 vargs.safe_push (vec_oprnd0);
4242 else
4244 vec_oprnd0 = build_constructor (atype, ctor_elts);
4245 gassign *new_stmt
4246 = gimple_build_assign (make_ssa_name (atype),
4247 vec_oprnd0);
4248 vect_finish_stmt_generation (stmt_info, new_stmt,
4249 gsi);
4250 vargs.safe_push (gimple_assign_lhs (new_stmt));
4254 break;
4255 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4256 vargs.safe_push (op);
4257 break;
4258 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4259 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4260 if (j == 0)
4262 gimple_seq stmts;
4263 arginfo[i].op
4264 = force_gimple_operand (arginfo[i].op, &stmts, true,
4265 NULL_TREE);
4266 if (stmts != NULL)
4268 basic_block new_bb;
4269 edge pe = loop_preheader_edge (loop);
4270 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4271 gcc_assert (!new_bb);
4273 if (arginfo[i].simd_lane_linear)
4275 vargs.safe_push (arginfo[i].op);
4276 break;
4278 tree phi_res = copy_ssa_name (op);
4279 gphi *new_phi = create_phi_node (phi_res, loop->header);
4280 loop_vinfo->add_stmt (new_phi);
4281 add_phi_arg (new_phi, arginfo[i].op,
4282 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4283 enum tree_code code
4284 = POINTER_TYPE_P (TREE_TYPE (op))
4285 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4286 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4287 ? sizetype : TREE_TYPE (op);
4288 widest_int cst
4289 = wi::mul (bestn->simdclone->args[i].linear_step,
4290 ncopies * nunits);
4291 tree tcst = wide_int_to_tree (type, cst);
4292 tree phi_arg = copy_ssa_name (op);
4293 gassign *new_stmt
4294 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4295 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4296 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4297 loop_vinfo->add_stmt (new_stmt);
4298 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4299 UNKNOWN_LOCATION);
4300 arginfo[i].op = phi_res;
4301 vargs.safe_push (phi_res);
4303 else
4305 enum tree_code code
4306 = POINTER_TYPE_P (TREE_TYPE (op))
4307 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4308 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4309 ? sizetype : TREE_TYPE (op);
4310 widest_int cst
4311 = wi::mul (bestn->simdclone->args[i].linear_step,
4312 j * nunits);
4313 tree tcst = wide_int_to_tree (type, cst);
4314 new_temp = make_ssa_name (TREE_TYPE (op));
4315 gassign *new_stmt
4316 = gimple_build_assign (new_temp, code,
4317 arginfo[i].op, tcst);
4318 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4319 vargs.safe_push (new_temp);
4321 break;
4322 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4323 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4324 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4325 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4326 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4327 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4328 default:
4329 gcc_unreachable ();
4333 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4334 if (vec_dest)
4336 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4337 if (ratype)
4338 new_temp = create_tmp_var (ratype);
4339 else if (simd_clone_subparts (vectype)
4340 == simd_clone_subparts (rtype))
4341 new_temp = make_ssa_name (vec_dest, new_call);
4342 else
4343 new_temp = make_ssa_name (rtype, new_call);
4344 gimple_call_set_lhs (new_call, new_temp);
4346 stmt_vec_info new_stmt_info
4347 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4349 if (vec_dest)
4351 if (simd_clone_subparts (vectype) < nunits)
4353 unsigned int k, l;
4354 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4355 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4356 k = nunits / simd_clone_subparts (vectype);
4357 gcc_assert ((k & (k - 1)) == 0);
4358 for (l = 0; l < k; l++)
4360 tree t;
4361 if (ratype)
4363 t = build_fold_addr_expr (new_temp);
4364 t = build2 (MEM_REF, vectype, t,
4365 build_int_cst (TREE_TYPE (t), l * bytes));
4367 else
4368 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4369 bitsize_int (prec), bitsize_int (l * prec));
4370 gimple *new_stmt
4371 = gimple_build_assign (make_ssa_name (vectype), t);
4372 new_stmt_info
4373 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4375 if (j == 0 && l == 0)
4376 STMT_VINFO_VEC_STMT (stmt_info)
4377 = *vec_stmt = new_stmt_info;
4378 else
4379 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4381 prev_stmt_info = new_stmt_info;
4384 if (ratype)
4385 vect_clobber_variable (stmt_info, gsi, new_temp);
4386 continue;
4388 else if (simd_clone_subparts (vectype) > nunits)
4390 unsigned int k = (simd_clone_subparts (vectype)
4391 / simd_clone_subparts (rtype));
4392 gcc_assert ((k & (k - 1)) == 0);
4393 if ((j & (k - 1)) == 0)
4394 vec_alloc (ret_ctor_elts, k);
4395 if (ratype)
4397 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4398 for (m = 0; m < o; m++)
4400 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4401 size_int (m), NULL_TREE, NULL_TREE);
4402 gimple *new_stmt
4403 = gimple_build_assign (make_ssa_name (rtype), tem);
4404 new_stmt_info
4405 = vect_finish_stmt_generation (stmt_info, new_stmt,
4406 gsi);
4407 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4408 gimple_assign_lhs (new_stmt));
4410 vect_clobber_variable (stmt_info, gsi, new_temp);
4412 else
4413 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4414 if ((j & (k - 1)) != k - 1)
4415 continue;
4416 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4417 gimple *new_stmt
4418 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4419 new_stmt_info
4420 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4422 if ((unsigned) j == k - 1)
4423 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4424 else
4425 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4427 prev_stmt_info = new_stmt_info;
4428 continue;
4430 else if (ratype)
4432 tree t = build_fold_addr_expr (new_temp);
4433 t = build2 (MEM_REF, vectype, t,
4434 build_int_cst (TREE_TYPE (t), 0));
4435 gimple *new_stmt
4436 = gimple_build_assign (make_ssa_name (vec_dest), t);
4437 new_stmt_info
4438 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4439 vect_clobber_variable (stmt_info, gsi, new_temp);
4443 if (j == 0)
4444 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4445 else
4446 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4448 prev_stmt_info = new_stmt_info;
4451 vargs.release ();
4453 /* The call in STMT might prevent it from being removed in dce.
4454 We however cannot remove it here, due to the way the ssa name
4455 it defines is mapped to the new definition. So just replace
4456 rhs of the statement with something harmless. */
4458 if (slp_node)
4459 return true;
4461 gimple *new_stmt;
4462 if (scalar_dest)
4464 type = TREE_TYPE (scalar_dest);
4465 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4466 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4468 else
4469 new_stmt = gimple_build_nop ();
4470 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4471 unlink_stmt_vdef (stmt);
4473 return true;
4477 /* Function vect_gen_widened_results_half
4479 Create a vector stmt whose code, type, number of arguments, and result
4480 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4481 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4482 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4483 needs to be created (DECL is a function-decl of a target-builtin).
4484 STMT_INFO is the original scalar stmt that we are vectorizing. */
4486 static gimple *
4487 vect_gen_widened_results_half (enum tree_code code,
4488 tree decl,
4489 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4490 tree vec_dest, gimple_stmt_iterator *gsi,
4491 stmt_vec_info stmt_info)
4493 gimple *new_stmt;
4494 tree new_temp;
4496 /* Generate half of the widened result: */
4497 if (code == CALL_EXPR)
4499 /* Target specific support */
4500 if (op_type == binary_op)
4501 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4502 else
4503 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4504 new_temp = make_ssa_name (vec_dest, new_stmt);
4505 gimple_call_set_lhs (new_stmt, new_temp);
4507 else
4509 /* Generic support */
4510 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4511 if (op_type != binary_op)
4512 vec_oprnd1 = NULL;
4513 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4514 new_temp = make_ssa_name (vec_dest, new_stmt);
4515 gimple_assign_set_lhs (new_stmt, new_temp);
4517 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4519 return new_stmt;
4523 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4524 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4525 containing scalar operand), and for the rest we get a copy with
4526 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4527 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4528 The vectors are collected into VEC_OPRNDS. */
4530 static void
4531 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4532 vec<tree> *vec_oprnds, int multi_step_cvt)
4534 vec_info *vinfo = stmt_info->vinfo;
4535 tree vec_oprnd;
4537 /* Get first vector operand. */
4538 /* All the vector operands except the very first one (that is scalar oprnd)
4539 are stmt copies. */
4540 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4541 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4542 else
4543 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4545 vec_oprnds->quick_push (vec_oprnd);
4547 /* Get second vector operand. */
4548 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4549 vec_oprnds->quick_push (vec_oprnd);
4551 *oprnd = vec_oprnd;
4553 /* For conversion in multiple steps, continue to get operands
4554 recursively. */
4555 if (multi_step_cvt)
4556 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4557 multi_step_cvt - 1);
4561 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4562 For multi-step conversions store the resulting vectors and call the function
4563 recursively. */
4565 static void
4566 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4567 int multi_step_cvt,
4568 stmt_vec_info stmt_info,
4569 vec<tree> vec_dsts,
4570 gimple_stmt_iterator *gsi,
4571 slp_tree slp_node, enum tree_code code,
4572 stmt_vec_info *prev_stmt_info)
4574 unsigned int i;
4575 tree vop0, vop1, new_tmp, vec_dest;
4577 vec_dest = vec_dsts.pop ();
4579 for (i = 0; i < vec_oprnds->length (); i += 2)
4581 /* Create demotion operation. */
4582 vop0 = (*vec_oprnds)[i];
4583 vop1 = (*vec_oprnds)[i + 1];
4584 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4585 new_tmp = make_ssa_name (vec_dest, new_stmt);
4586 gimple_assign_set_lhs (new_stmt, new_tmp);
4587 stmt_vec_info new_stmt_info
4588 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4590 if (multi_step_cvt)
4591 /* Store the resulting vector for next recursive call. */
4592 (*vec_oprnds)[i/2] = new_tmp;
4593 else
4595 /* This is the last step of the conversion sequence. Store the
4596 vectors in SLP_NODE or in vector info of the scalar statement
4597 (or in STMT_VINFO_RELATED_STMT chain). */
4598 if (slp_node)
4599 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4600 else
4602 if (!*prev_stmt_info)
4603 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4604 else
4605 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4607 *prev_stmt_info = new_stmt_info;
4612 /* For multi-step demotion operations we first generate demotion operations
4613 from the source type to the intermediate types, and then combine the
4614 results (stored in VEC_OPRNDS) in demotion operation to the destination
4615 type. */
4616 if (multi_step_cvt)
4618 /* At each level of recursion we have half of the operands we had at the
4619 previous level. */
4620 vec_oprnds->truncate ((i+1)/2);
4621 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4622 stmt_info, vec_dsts, gsi,
4623 slp_node, VEC_PACK_TRUNC_EXPR,
4624 prev_stmt_info);
4627 vec_dsts.quick_push (vec_dest);
4631 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4632 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4633 STMT_INFO. For multi-step conversions store the resulting vectors and
4634 call the function recursively. */
4636 static void
4637 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4638 vec<tree> *vec_oprnds1,
4639 stmt_vec_info stmt_info, tree vec_dest,
4640 gimple_stmt_iterator *gsi,
4641 enum tree_code code1,
4642 enum tree_code code2, tree decl1,
4643 tree decl2, int op_type)
4645 int i;
4646 tree vop0, vop1, new_tmp1, new_tmp2;
4647 gimple *new_stmt1, *new_stmt2;
4648 vec<tree> vec_tmp = vNULL;
4650 vec_tmp.create (vec_oprnds0->length () * 2);
4651 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4653 if (op_type == binary_op)
4654 vop1 = (*vec_oprnds1)[i];
4655 else
4656 vop1 = NULL_TREE;
4658 /* Generate the two halves of promotion operation. */
4659 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4660 op_type, vec_dest, gsi,
4661 stmt_info);
4662 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4663 op_type, vec_dest, gsi,
4664 stmt_info);
4665 if (is_gimple_call (new_stmt1))
4667 new_tmp1 = gimple_call_lhs (new_stmt1);
4668 new_tmp2 = gimple_call_lhs (new_stmt2);
4670 else
4672 new_tmp1 = gimple_assign_lhs (new_stmt1);
4673 new_tmp2 = gimple_assign_lhs (new_stmt2);
4676 /* Store the results for the next step. */
4677 vec_tmp.quick_push (new_tmp1);
4678 vec_tmp.quick_push (new_tmp2);
4681 vec_oprnds0->release ();
4682 *vec_oprnds0 = vec_tmp;
4686 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4687 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4688 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4689 Return true if STMT_INFO is vectorizable in this way. */
4691 static bool
4692 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4693 stmt_vec_info *vec_stmt, slp_tree slp_node,
4694 stmt_vector_for_cost *cost_vec)
4696 tree vec_dest;
4697 tree scalar_dest;
4698 tree op0, op1 = NULL_TREE;
4699 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4701 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4702 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4703 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4704 tree new_temp;
4705 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4706 int ndts = 2;
4707 stmt_vec_info prev_stmt_info;
4708 poly_uint64 nunits_in;
4709 poly_uint64 nunits_out;
4710 tree vectype_out, vectype_in;
4711 int ncopies, i, j;
4712 tree lhs_type, rhs_type;
4713 enum { NARROW, NONE, WIDEN } modifier;
4714 vec<tree> vec_oprnds0 = vNULL;
4715 vec<tree> vec_oprnds1 = vNULL;
4716 tree vop0;
4717 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4718 vec_info *vinfo = stmt_info->vinfo;
4719 int multi_step_cvt = 0;
4720 vec<tree> interm_types = vNULL;
4721 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4722 int op_type;
4723 unsigned short fltsz;
4725 /* Is STMT a vectorizable conversion? */
4727 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4728 return false;
4730 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4731 && ! vec_stmt)
4732 return false;
4734 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4735 if (!stmt)
4736 return false;
4738 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4739 return false;
4741 code = gimple_assign_rhs_code (stmt);
4742 if (!CONVERT_EXPR_CODE_P (code)
4743 && code != FIX_TRUNC_EXPR
4744 && code != FLOAT_EXPR
4745 && code != WIDEN_MULT_EXPR
4746 && code != WIDEN_LSHIFT_EXPR)
4747 return false;
4749 op_type = TREE_CODE_LENGTH (code);
4751 /* Check types of lhs and rhs. */
4752 scalar_dest = gimple_assign_lhs (stmt);
4753 lhs_type = TREE_TYPE (scalar_dest);
4754 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4756 op0 = gimple_assign_rhs1 (stmt);
4757 rhs_type = TREE_TYPE (op0);
4759 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4760 && !((INTEGRAL_TYPE_P (lhs_type)
4761 && INTEGRAL_TYPE_P (rhs_type))
4762 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4763 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4764 return false;
4766 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4767 && ((INTEGRAL_TYPE_P (lhs_type)
4768 && !type_has_mode_precision_p (lhs_type))
4769 || (INTEGRAL_TYPE_P (rhs_type)
4770 && !type_has_mode_precision_p (rhs_type))))
4772 if (dump_enabled_p ())
4773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4774 "type conversion to/from bit-precision unsupported."
4775 "\n");
4776 return false;
4779 /* Check the operands of the operation. */
4780 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4782 if (dump_enabled_p ())
4783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4784 "use not simple.\n");
4785 return false;
4787 if (op_type == binary_op)
4789 bool ok;
4791 op1 = gimple_assign_rhs2 (stmt);
4792 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4793 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4794 OP1. */
4795 if (CONSTANT_CLASS_P (op0))
4796 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4797 else
4798 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4800 if (!ok)
4802 if (dump_enabled_p ())
4803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4804 "use not simple.\n");
4805 return false;
4809 /* If op0 is an external or constant defs use a vector type of
4810 the same size as the output vector type. */
4811 if (!vectype_in)
4812 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4813 if (vec_stmt)
4814 gcc_assert (vectype_in);
4815 if (!vectype_in)
4817 if (dump_enabled_p ())
4818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4819 "no vectype for scalar type %T\n", rhs_type);
4821 return false;
4824 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4825 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4827 if (dump_enabled_p ())
4828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4829 "can't convert between boolean and non "
4830 "boolean vectors %T\n", rhs_type);
4832 return false;
4835 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4836 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4837 if (known_eq (nunits_out, nunits_in))
4838 modifier = NONE;
4839 else if (multiple_p (nunits_out, nunits_in))
4840 modifier = NARROW;
4841 else
4843 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4844 modifier = WIDEN;
4847 /* Multiple types in SLP are handled by creating the appropriate number of
4848 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4849 case of SLP. */
4850 if (slp_node)
4851 ncopies = 1;
4852 else if (modifier == NARROW)
4853 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4854 else
4855 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4857 /* Sanity check: make sure that at least one copy of the vectorized stmt
4858 needs to be generated. */
4859 gcc_assert (ncopies >= 1);
4861 bool found_mode = false;
4862 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4863 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4864 opt_scalar_mode rhs_mode_iter;
4866 /* Supportable by target? */
4867 switch (modifier)
4869 case NONE:
4870 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4871 return false;
4872 if (supportable_convert_operation (code, vectype_out, vectype_in,
4873 &decl1, &code1))
4874 break;
4875 /* FALLTHRU */
4876 unsupported:
4877 if (dump_enabled_p ())
4878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4879 "conversion not supported by target.\n");
4880 return false;
4882 case WIDEN:
4883 if (supportable_widening_operation (code, stmt_info, vectype_out,
4884 vectype_in, &code1, &code2,
4885 &multi_step_cvt, &interm_types))
4887 /* Binary widening operation can only be supported directly by the
4888 architecture. */
4889 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4890 break;
4893 if (code != FLOAT_EXPR
4894 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4895 goto unsupported;
4897 fltsz = GET_MODE_SIZE (lhs_mode);
4898 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4900 rhs_mode = rhs_mode_iter.require ();
4901 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4902 break;
4904 cvt_type
4905 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4906 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4907 if (cvt_type == NULL_TREE)
4908 goto unsupported;
4910 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4912 if (!supportable_convert_operation (code, vectype_out,
4913 cvt_type, &decl1, &codecvt1))
4914 goto unsupported;
4916 else if (!supportable_widening_operation (code, stmt_info,
4917 vectype_out, cvt_type,
4918 &codecvt1, &codecvt2,
4919 &multi_step_cvt,
4920 &interm_types))
4921 continue;
4922 else
4923 gcc_assert (multi_step_cvt == 0);
4925 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4926 vectype_in, &code1, &code2,
4927 &multi_step_cvt, &interm_types))
4929 found_mode = true;
4930 break;
4934 if (!found_mode)
4935 goto unsupported;
4937 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4938 codecvt2 = ERROR_MARK;
4939 else
4941 multi_step_cvt++;
4942 interm_types.safe_push (cvt_type);
4943 cvt_type = NULL_TREE;
4945 break;
4947 case NARROW:
4948 gcc_assert (op_type == unary_op);
4949 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4950 &code1, &multi_step_cvt,
4951 &interm_types))
4952 break;
4954 if (code != FIX_TRUNC_EXPR
4955 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4956 goto unsupported;
4958 cvt_type
4959 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4960 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4961 if (cvt_type == NULL_TREE)
4962 goto unsupported;
4963 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4964 &decl1, &codecvt1))
4965 goto unsupported;
4966 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4967 &code1, &multi_step_cvt,
4968 &interm_types))
4969 break;
4970 goto unsupported;
4972 default:
4973 gcc_unreachable ();
4976 if (!vec_stmt) /* transformation not required. */
4978 DUMP_VECT_SCOPE ("vectorizable_conversion");
4979 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4981 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4982 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4983 cost_vec);
4985 else if (modifier == NARROW)
4987 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4988 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4989 cost_vec);
4991 else
4993 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4994 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4995 cost_vec);
4997 interm_types.release ();
4998 return true;
5001 /* Transform. */
5002 if (dump_enabled_p ())
5003 dump_printf_loc (MSG_NOTE, vect_location,
5004 "transform conversion. ncopies = %d.\n", ncopies);
5006 if (op_type == binary_op)
5008 if (CONSTANT_CLASS_P (op0))
5009 op0 = fold_convert (TREE_TYPE (op1), op0);
5010 else if (CONSTANT_CLASS_P (op1))
5011 op1 = fold_convert (TREE_TYPE (op0), op1);
5014 /* In case of multi-step conversion, we first generate conversion operations
5015 to the intermediate types, and then from that types to the final one.
5016 We create vector destinations for the intermediate type (TYPES) received
5017 from supportable_*_operation, and store them in the correct order
5018 for future use in vect_create_vectorized_*_stmts (). */
5019 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5020 vec_dest = vect_create_destination_var (scalar_dest,
5021 (cvt_type && modifier == WIDEN)
5022 ? cvt_type : vectype_out);
5023 vec_dsts.quick_push (vec_dest);
5025 if (multi_step_cvt)
5027 for (i = interm_types.length () - 1;
5028 interm_types.iterate (i, &intermediate_type); i--)
5030 vec_dest = vect_create_destination_var (scalar_dest,
5031 intermediate_type);
5032 vec_dsts.quick_push (vec_dest);
5036 if (cvt_type)
5037 vec_dest = vect_create_destination_var (scalar_dest,
5038 modifier == WIDEN
5039 ? vectype_out : cvt_type);
5041 if (!slp_node)
5043 if (modifier == WIDEN)
5045 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5046 if (op_type == binary_op)
5047 vec_oprnds1.create (1);
5049 else if (modifier == NARROW)
5050 vec_oprnds0.create (
5051 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5053 else if (code == WIDEN_LSHIFT_EXPR)
5054 vec_oprnds1.create (slp_node->vec_stmts_size);
5056 last_oprnd = op0;
5057 prev_stmt_info = NULL;
5058 switch (modifier)
5060 case NONE:
5061 for (j = 0; j < ncopies; j++)
5063 if (j == 0)
5064 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5065 NULL, slp_node);
5066 else
5067 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5069 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5071 stmt_vec_info new_stmt_info;
5072 /* Arguments are ready, create the new vector stmt. */
5073 if (code1 == CALL_EXPR)
5075 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5076 new_temp = make_ssa_name (vec_dest, new_stmt);
5077 gimple_call_set_lhs (new_stmt, new_temp);
5078 new_stmt_info
5079 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5081 else
5083 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5084 gassign *new_stmt
5085 = gimple_build_assign (vec_dest, code1, vop0);
5086 new_temp = make_ssa_name (vec_dest, new_stmt);
5087 gimple_assign_set_lhs (new_stmt, new_temp);
5088 new_stmt_info
5089 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5092 if (slp_node)
5093 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5094 else
5096 if (!prev_stmt_info)
5097 STMT_VINFO_VEC_STMT (stmt_info)
5098 = *vec_stmt = new_stmt_info;
5099 else
5100 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5101 prev_stmt_info = new_stmt_info;
5105 break;
5107 case WIDEN:
5108 /* In case the vectorization factor (VF) is bigger than the number
5109 of elements that we can fit in a vectype (nunits), we have to
5110 generate more than one vector stmt - i.e - we need to "unroll"
5111 the vector stmt by a factor VF/nunits. */
5112 for (j = 0; j < ncopies; j++)
5114 /* Handle uses. */
5115 if (j == 0)
5117 if (slp_node)
5119 if (code == WIDEN_LSHIFT_EXPR)
5121 unsigned int k;
5123 vec_oprnd1 = op1;
5124 /* Store vec_oprnd1 for every vector stmt to be created
5125 for SLP_NODE. We check during the analysis that all
5126 the shift arguments are the same. */
5127 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5128 vec_oprnds1.quick_push (vec_oprnd1);
5130 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5131 &vec_oprnds0, NULL, slp_node);
5133 else
5134 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5135 &vec_oprnds1, slp_node);
5137 else
5139 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5140 vec_oprnds0.quick_push (vec_oprnd0);
5141 if (op_type == binary_op)
5143 if (code == WIDEN_LSHIFT_EXPR)
5144 vec_oprnd1 = op1;
5145 else
5146 vec_oprnd1
5147 = vect_get_vec_def_for_operand (op1, stmt_info);
5148 vec_oprnds1.quick_push (vec_oprnd1);
5152 else
5154 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5155 vec_oprnds0.truncate (0);
5156 vec_oprnds0.quick_push (vec_oprnd0);
5157 if (op_type == binary_op)
5159 if (code == WIDEN_LSHIFT_EXPR)
5160 vec_oprnd1 = op1;
5161 else
5162 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5163 vec_oprnd1);
5164 vec_oprnds1.truncate (0);
5165 vec_oprnds1.quick_push (vec_oprnd1);
5169 /* Arguments are ready. Create the new vector stmts. */
5170 for (i = multi_step_cvt; i >= 0; i--)
5172 tree this_dest = vec_dsts[i];
5173 enum tree_code c1 = code1, c2 = code2;
5174 if (i == 0 && codecvt2 != ERROR_MARK)
5176 c1 = codecvt1;
5177 c2 = codecvt2;
5179 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5180 &vec_oprnds1, stmt_info,
5181 this_dest, gsi,
5182 c1, c2, decl1, decl2,
5183 op_type);
5186 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5188 stmt_vec_info new_stmt_info;
5189 if (cvt_type)
5191 if (codecvt1 == CALL_EXPR)
5193 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5194 new_temp = make_ssa_name (vec_dest, new_stmt);
5195 gimple_call_set_lhs (new_stmt, new_temp);
5196 new_stmt_info
5197 = vect_finish_stmt_generation (stmt_info, new_stmt,
5198 gsi);
5200 else
5202 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5203 new_temp = make_ssa_name (vec_dest);
5204 gassign *new_stmt
5205 = gimple_build_assign (new_temp, codecvt1, vop0);
5206 new_stmt_info
5207 = vect_finish_stmt_generation (stmt_info, new_stmt,
5208 gsi);
5211 else
5212 new_stmt_info = vinfo->lookup_def (vop0);
5214 if (slp_node)
5215 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5216 else
5218 if (!prev_stmt_info)
5219 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5220 else
5221 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5222 prev_stmt_info = new_stmt_info;
5227 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5228 break;
5230 case NARROW:
5231 /* In case the vectorization factor (VF) is bigger than the number
5232 of elements that we can fit in a vectype (nunits), we have to
5233 generate more than one vector stmt - i.e - we need to "unroll"
5234 the vector stmt by a factor VF/nunits. */
5235 for (j = 0; j < ncopies; j++)
5237 /* Handle uses. */
5238 if (slp_node)
5239 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5240 slp_node);
5241 else
5243 vec_oprnds0.truncate (0);
5244 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5245 vect_pow2 (multi_step_cvt) - 1);
5248 /* Arguments are ready. Create the new vector stmts. */
5249 if (cvt_type)
5250 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5252 if (codecvt1 == CALL_EXPR)
5254 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5255 new_temp = make_ssa_name (vec_dest, new_stmt);
5256 gimple_call_set_lhs (new_stmt, new_temp);
5257 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5259 else
5261 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5262 new_temp = make_ssa_name (vec_dest);
5263 gassign *new_stmt
5264 = gimple_build_assign (new_temp, codecvt1, vop0);
5265 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5268 vec_oprnds0[i] = new_temp;
5271 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5272 stmt_info, vec_dsts, gsi,
5273 slp_node, code1,
5274 &prev_stmt_info);
5277 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5278 break;
5281 vec_oprnds0.release ();
5282 vec_oprnds1.release ();
5283 interm_types.release ();
5285 return true;
5289 /* Function vectorizable_assignment.
5291 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5292 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5293 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5294 Return true if STMT_INFO is vectorizable in this way. */
5296 static bool
5297 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5298 stmt_vec_info *vec_stmt, slp_tree slp_node,
5299 stmt_vector_for_cost *cost_vec)
5301 tree vec_dest;
5302 tree scalar_dest;
5303 tree op;
5304 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5305 tree new_temp;
5306 enum vect_def_type dt[1] = {vect_unknown_def_type};
5307 int ndts = 1;
5308 int ncopies;
5309 int i, j;
5310 vec<tree> vec_oprnds = vNULL;
5311 tree vop;
5312 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5313 vec_info *vinfo = stmt_info->vinfo;
5314 stmt_vec_info prev_stmt_info = NULL;
5315 enum tree_code code;
5316 tree vectype_in;
5318 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5319 return false;
5321 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5322 && ! vec_stmt)
5323 return false;
5325 /* Is vectorizable assignment? */
5326 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5327 if (!stmt)
5328 return false;
5330 scalar_dest = gimple_assign_lhs (stmt);
5331 if (TREE_CODE (scalar_dest) != SSA_NAME)
5332 return false;
5334 code = gimple_assign_rhs_code (stmt);
5335 if (gimple_assign_single_p (stmt)
5336 || code == PAREN_EXPR
5337 || CONVERT_EXPR_CODE_P (code))
5338 op = gimple_assign_rhs1 (stmt);
5339 else
5340 return false;
5342 if (code == VIEW_CONVERT_EXPR)
5343 op = TREE_OPERAND (op, 0);
5345 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5346 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5348 /* Multiple types in SLP are handled by creating the appropriate number of
5349 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5350 case of SLP. */
5351 if (slp_node)
5352 ncopies = 1;
5353 else
5354 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5356 gcc_assert (ncopies >= 1);
5358 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5360 if (dump_enabled_p ())
5361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5362 "use not simple.\n");
5363 return false;
5366 /* We can handle NOP_EXPR conversions that do not change the number
5367 of elements or the vector size. */
5368 if ((CONVERT_EXPR_CODE_P (code)
5369 || code == VIEW_CONVERT_EXPR)
5370 && (!vectype_in
5371 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5372 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5373 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5374 return false;
5376 /* We do not handle bit-precision changes. */
5377 if ((CONVERT_EXPR_CODE_P (code)
5378 || code == VIEW_CONVERT_EXPR)
5379 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5380 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5381 || !type_has_mode_precision_p (TREE_TYPE (op)))
5382 /* But a conversion that does not change the bit-pattern is ok. */
5383 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5384 > TYPE_PRECISION (TREE_TYPE (op)))
5385 && TYPE_UNSIGNED (TREE_TYPE (op)))
5386 /* Conversion between boolean types of different sizes is
5387 a simple assignment in case their vectypes are same
5388 boolean vectors. */
5389 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5390 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5392 if (dump_enabled_p ())
5393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5394 "type conversion to/from bit-precision "
5395 "unsupported.\n");
5396 return false;
5399 if (!vec_stmt) /* transformation not required. */
5401 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5402 DUMP_VECT_SCOPE ("vectorizable_assignment");
5403 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5404 return true;
5407 /* Transform. */
5408 if (dump_enabled_p ())
5409 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5411 /* Handle def. */
5412 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5414 /* Handle use. */
5415 for (j = 0; j < ncopies; j++)
5417 /* Handle uses. */
5418 if (j == 0)
5419 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5420 else
5421 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5423 /* Arguments are ready. create the new vector stmt. */
5424 stmt_vec_info new_stmt_info = NULL;
5425 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5427 if (CONVERT_EXPR_CODE_P (code)
5428 || code == VIEW_CONVERT_EXPR)
5429 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5430 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5431 new_temp = make_ssa_name (vec_dest, new_stmt);
5432 gimple_assign_set_lhs (new_stmt, new_temp);
5433 new_stmt_info
5434 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5435 if (slp_node)
5436 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5439 if (slp_node)
5440 continue;
5442 if (j == 0)
5443 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5444 else
5445 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5447 prev_stmt_info = new_stmt_info;
5450 vec_oprnds.release ();
5451 return true;
5455 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5456 either as shift by a scalar or by a vector. */
5458 bool
5459 vect_supportable_shift (enum tree_code code, tree scalar_type)
5462 machine_mode vec_mode;
5463 optab optab;
5464 int icode;
5465 tree vectype;
5467 vectype = get_vectype_for_scalar_type (scalar_type);
5468 if (!vectype)
5469 return false;
5471 optab = optab_for_tree_code (code, vectype, optab_scalar);
5472 if (!optab
5473 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5475 optab = optab_for_tree_code (code, vectype, optab_vector);
5476 if (!optab
5477 || (optab_handler (optab, TYPE_MODE (vectype))
5478 == CODE_FOR_nothing))
5479 return false;
5482 vec_mode = TYPE_MODE (vectype);
5483 icode = (int) optab_handler (optab, vec_mode);
5484 if (icode == CODE_FOR_nothing)
5485 return false;
5487 return true;
5491 /* Function vectorizable_shift.
5493 Check if STMT_INFO performs a shift operation that can be vectorized.
5494 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5495 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5496 Return true if STMT_INFO is vectorizable in this way. */
5498 bool
5499 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5500 stmt_vec_info *vec_stmt, slp_tree slp_node,
5501 stmt_vector_for_cost *cost_vec)
5503 tree vec_dest;
5504 tree scalar_dest;
5505 tree op0, op1 = NULL;
5506 tree vec_oprnd1 = NULL_TREE;
5507 tree vectype;
5508 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5509 enum tree_code code;
5510 machine_mode vec_mode;
5511 tree new_temp;
5512 optab optab;
5513 int icode;
5514 machine_mode optab_op2_mode;
5515 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5516 int ndts = 2;
5517 stmt_vec_info prev_stmt_info;
5518 poly_uint64 nunits_in;
5519 poly_uint64 nunits_out;
5520 tree vectype_out;
5521 tree op1_vectype;
5522 int ncopies;
5523 int j, i;
5524 vec<tree> vec_oprnds0 = vNULL;
5525 vec<tree> vec_oprnds1 = vNULL;
5526 tree vop0, vop1;
5527 unsigned int k;
5528 bool scalar_shift_arg = true;
5529 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5530 vec_info *vinfo = stmt_info->vinfo;
5532 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5533 return false;
5535 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5536 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5537 && ! vec_stmt)
5538 return false;
5540 /* Is STMT a vectorizable binary/unary operation? */
5541 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5542 if (!stmt)
5543 return false;
5545 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5546 return false;
5548 code = gimple_assign_rhs_code (stmt);
5550 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5551 || code == RROTATE_EXPR))
5552 return false;
5554 scalar_dest = gimple_assign_lhs (stmt);
5555 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5556 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5558 if (dump_enabled_p ())
5559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5560 "bit-precision shifts not supported.\n");
5561 return false;
5564 op0 = gimple_assign_rhs1 (stmt);
5565 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5567 if (dump_enabled_p ())
5568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5569 "use not simple.\n");
5570 return false;
5572 /* If op0 is an external or constant def use a vector type with
5573 the same size as the output vector type. */
5574 if (!vectype)
5575 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5576 if (vec_stmt)
5577 gcc_assert (vectype);
5578 if (!vectype)
5580 if (dump_enabled_p ())
5581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5582 "no vectype for scalar type\n");
5583 return false;
5586 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5587 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5588 if (maybe_ne (nunits_out, nunits_in))
5589 return false;
5591 op1 = gimple_assign_rhs2 (stmt);
5592 stmt_vec_info op1_def_stmt_info;
5593 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5594 &op1_def_stmt_info))
5596 if (dump_enabled_p ())
5597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5598 "use not simple.\n");
5599 return false;
5602 /* Multiple types in SLP are handled by creating the appropriate number of
5603 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5604 case of SLP. */
5605 if (slp_node)
5606 ncopies = 1;
5607 else
5608 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5610 gcc_assert (ncopies >= 1);
5612 /* Determine whether the shift amount is a vector, or scalar. If the
5613 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5615 if ((dt[1] == vect_internal_def
5616 || dt[1] == vect_induction_def
5617 || dt[1] == vect_nested_cycle)
5618 && !slp_node)
5619 scalar_shift_arg = false;
5620 else if (dt[1] == vect_constant_def
5621 || dt[1] == vect_external_def
5622 || dt[1] == vect_internal_def)
5624 /* In SLP, need to check whether the shift count is the same,
5625 in loops if it is a constant or invariant, it is always
5626 a scalar shift. */
5627 if (slp_node)
5629 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5630 stmt_vec_info slpstmt_info;
5632 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5634 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5635 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5636 scalar_shift_arg = false;
5639 /* For internal SLP defs we have to make sure we see scalar stmts
5640 for all vector elements.
5641 ??? For different vectors we could resort to a different
5642 scalar shift operand but code-generation below simply always
5643 takes the first. */
5644 if (dt[1] == vect_internal_def
5645 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5646 stmts.length ()))
5647 scalar_shift_arg = false;
5650 /* If the shift amount is computed by a pattern stmt we cannot
5651 use the scalar amount directly thus give up and use a vector
5652 shift. */
5653 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5654 scalar_shift_arg = false;
5656 else
5658 if (dump_enabled_p ())
5659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5660 "operand mode requires invariant argument.\n");
5661 return false;
5664 /* Vector shifted by vector. */
5665 if (!scalar_shift_arg)
5667 optab = optab_for_tree_code (code, vectype, optab_vector);
5668 if (dump_enabled_p ())
5669 dump_printf_loc (MSG_NOTE, vect_location,
5670 "vector/vector shift/rotate found.\n");
5672 if (!op1_vectype)
5673 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5674 if (op1_vectype == NULL_TREE
5675 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679 "unusable type for last operand in"
5680 " vector/vector shift/rotate.\n");
5681 return false;
5684 /* See if the machine has a vector shifted by scalar insn and if not
5685 then see if it has a vector shifted by vector insn. */
5686 else
5688 optab = optab_for_tree_code (code, vectype, optab_scalar);
5689 if (optab
5690 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5692 if (dump_enabled_p ())
5693 dump_printf_loc (MSG_NOTE, vect_location,
5694 "vector/scalar shift/rotate found.\n");
5696 else
5698 optab = optab_for_tree_code (code, vectype, optab_vector);
5699 if (optab
5700 && (optab_handler (optab, TYPE_MODE (vectype))
5701 != CODE_FOR_nothing))
5703 scalar_shift_arg = false;
5705 if (dump_enabled_p ())
5706 dump_printf_loc (MSG_NOTE, vect_location,
5707 "vector/vector shift/rotate found.\n");
5709 /* Unlike the other binary operators, shifts/rotates have
5710 the rhs being int, instead of the same type as the lhs,
5711 so make sure the scalar is the right type if we are
5712 dealing with vectors of long long/long/short/char. */
5713 if (dt[1] == vect_constant_def)
5714 op1 = fold_convert (TREE_TYPE (vectype), op1);
5715 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5716 TREE_TYPE (op1)))
5718 if (slp_node
5719 && TYPE_MODE (TREE_TYPE (vectype))
5720 != TYPE_MODE (TREE_TYPE (op1)))
5722 if (dump_enabled_p ())
5723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5724 "unusable type for last operand in"
5725 " vector/vector shift/rotate.\n");
5726 return false;
5728 if (vec_stmt && !slp_node)
5730 op1 = fold_convert (TREE_TYPE (vectype), op1);
5731 op1 = vect_init_vector (stmt_info, op1,
5732 TREE_TYPE (vectype), NULL);
5739 /* Supportable by target? */
5740 if (!optab)
5742 if (dump_enabled_p ())
5743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5744 "no optab.\n");
5745 return false;
5747 vec_mode = TYPE_MODE (vectype);
5748 icode = (int) optab_handler (optab, vec_mode);
5749 if (icode == CODE_FOR_nothing)
5751 if (dump_enabled_p ())
5752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5753 "op not supported by target.\n");
5754 /* Check only during analysis. */
5755 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5756 || (!vec_stmt
5757 && !vect_worthwhile_without_simd_p (vinfo, code)))
5758 return false;
5759 if (dump_enabled_p ())
5760 dump_printf_loc (MSG_NOTE, vect_location,
5761 "proceeding using word mode.\n");
5764 /* Worthwhile without SIMD support? Check only during analysis. */
5765 if (!vec_stmt
5766 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5767 && !vect_worthwhile_without_simd_p (vinfo, code))
5769 if (dump_enabled_p ())
5770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5771 "not worthwhile without SIMD support.\n");
5772 return false;
5775 if (!vec_stmt) /* transformation not required. */
5777 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5778 DUMP_VECT_SCOPE ("vectorizable_shift");
5779 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5780 return true;
5783 /* Transform. */
5785 if (dump_enabled_p ())
5786 dump_printf_loc (MSG_NOTE, vect_location,
5787 "transform binary/unary operation.\n");
5789 /* Handle def. */
5790 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5792 prev_stmt_info = NULL;
5793 for (j = 0; j < ncopies; j++)
5795 /* Handle uses. */
5796 if (j == 0)
5798 if (scalar_shift_arg)
5800 /* Vector shl and shr insn patterns can be defined with scalar
5801 operand 2 (shift operand). In this case, use constant or loop
5802 invariant op1 directly, without extending it to vector mode
5803 first. */
5804 optab_op2_mode = insn_data[icode].operand[2].mode;
5805 if (!VECTOR_MODE_P (optab_op2_mode))
5807 if (dump_enabled_p ())
5808 dump_printf_loc (MSG_NOTE, vect_location,
5809 "operand 1 using scalar mode.\n");
5810 vec_oprnd1 = op1;
5811 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5812 vec_oprnds1.quick_push (vec_oprnd1);
5813 if (slp_node)
5815 /* Store vec_oprnd1 for every vector stmt to be created
5816 for SLP_NODE. We check during the analysis that all
5817 the shift arguments are the same.
5818 TODO: Allow different constants for different vector
5819 stmts generated for an SLP instance. */
5820 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5821 vec_oprnds1.quick_push (vec_oprnd1);
5826 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5827 (a special case for certain kind of vector shifts); otherwise,
5828 operand 1 should be of a vector type (the usual case). */
5829 if (vec_oprnd1)
5830 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5831 slp_node);
5832 else
5833 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5834 slp_node);
5836 else
5837 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5839 /* Arguments are ready. Create the new vector stmt. */
5840 stmt_vec_info new_stmt_info = NULL;
5841 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5843 vop1 = vec_oprnds1[i];
5844 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5845 new_temp = make_ssa_name (vec_dest, new_stmt);
5846 gimple_assign_set_lhs (new_stmt, new_temp);
5847 new_stmt_info
5848 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5849 if (slp_node)
5850 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5853 if (slp_node)
5854 continue;
5856 if (j == 0)
5857 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5858 else
5859 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5860 prev_stmt_info = new_stmt_info;
5863 vec_oprnds0.release ();
5864 vec_oprnds1.release ();
5866 return true;
5870 /* Function vectorizable_operation.
5872 Check if STMT_INFO performs a binary, unary or ternary operation that can
5873 be vectorized.
5874 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5875 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5876 Return true if STMT_INFO is vectorizable in this way. */
5878 static bool
5879 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5880 stmt_vec_info *vec_stmt, slp_tree slp_node,
5881 stmt_vector_for_cost *cost_vec)
5883 tree vec_dest;
5884 tree scalar_dest;
5885 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5886 tree vectype;
5887 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5888 enum tree_code code, orig_code;
5889 machine_mode vec_mode;
5890 tree new_temp;
5891 int op_type;
5892 optab optab;
5893 bool target_support_p;
5894 enum vect_def_type dt[3]
5895 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5896 int ndts = 3;
5897 stmt_vec_info prev_stmt_info;
5898 poly_uint64 nunits_in;
5899 poly_uint64 nunits_out;
5900 tree vectype_out;
5901 int ncopies;
5902 int j, i;
5903 vec<tree> vec_oprnds0 = vNULL;
5904 vec<tree> vec_oprnds1 = vNULL;
5905 vec<tree> vec_oprnds2 = vNULL;
5906 tree vop0, vop1, vop2;
5907 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5908 vec_info *vinfo = stmt_info->vinfo;
5910 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5911 return false;
5913 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5914 && ! vec_stmt)
5915 return false;
5917 /* Is STMT a vectorizable binary/unary operation? */
5918 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5919 if (!stmt)
5920 return false;
5922 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5923 return false;
5925 orig_code = code = gimple_assign_rhs_code (stmt);
5927 /* For pointer addition and subtraction, we should use the normal
5928 plus and minus for the vector operation. */
5929 if (code == POINTER_PLUS_EXPR)
5930 code = PLUS_EXPR;
5931 if (code == POINTER_DIFF_EXPR)
5932 code = MINUS_EXPR;
5934 /* Support only unary or binary operations. */
5935 op_type = TREE_CODE_LENGTH (code);
5936 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5938 if (dump_enabled_p ())
5939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5940 "num. args = %d (not unary/binary/ternary op).\n",
5941 op_type);
5942 return false;
5945 scalar_dest = gimple_assign_lhs (stmt);
5946 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5948 /* Most operations cannot handle bit-precision types without extra
5949 truncations. */
5950 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5951 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5952 /* Exception are bitwise binary operations. */
5953 && code != BIT_IOR_EXPR
5954 && code != BIT_XOR_EXPR
5955 && code != BIT_AND_EXPR)
5957 if (dump_enabled_p ())
5958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5959 "bit-precision arithmetic not supported.\n");
5960 return false;
5963 op0 = gimple_assign_rhs1 (stmt);
5964 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5966 if (dump_enabled_p ())
5967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5968 "use not simple.\n");
5969 return false;
5971 /* If op0 is an external or constant def use a vector type with
5972 the same size as the output vector type. */
5973 if (!vectype)
5975 /* For boolean type we cannot determine vectype by
5976 invariant value (don't know whether it is a vector
5977 of booleans or vector of integers). We use output
5978 vectype because operations on boolean don't change
5979 type. */
5980 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5982 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5984 if (dump_enabled_p ())
5985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5986 "not supported operation on bool value.\n");
5987 return false;
5989 vectype = vectype_out;
5991 else
5992 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5994 if (vec_stmt)
5995 gcc_assert (vectype);
5996 if (!vectype)
5998 if (dump_enabled_p ())
5999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6000 "no vectype for scalar type %T\n",
6001 TREE_TYPE (op0));
6003 return false;
6006 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6007 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6008 if (maybe_ne (nunits_out, nunits_in))
6009 return false;
6011 if (op_type == binary_op || op_type == ternary_op)
6013 op1 = gimple_assign_rhs2 (stmt);
6014 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
6016 if (dump_enabled_p ())
6017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6018 "use not simple.\n");
6019 return false;
6022 if (op_type == ternary_op)
6024 op2 = gimple_assign_rhs3 (stmt);
6025 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
6027 if (dump_enabled_p ())
6028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6029 "use not simple.\n");
6030 return false;
6034 /* Multiple types in SLP are handled by creating the appropriate number of
6035 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6036 case of SLP. */
6037 if (slp_node)
6038 ncopies = 1;
6039 else
6040 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6042 gcc_assert (ncopies >= 1);
6044 /* Shifts are handled in vectorizable_shift (). */
6045 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6046 || code == RROTATE_EXPR)
6047 return false;
6049 /* Supportable by target? */
6051 vec_mode = TYPE_MODE (vectype);
6052 if (code == MULT_HIGHPART_EXPR)
6053 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6054 else
6056 optab = optab_for_tree_code (code, vectype, optab_default);
6057 if (!optab)
6059 if (dump_enabled_p ())
6060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6061 "no optab.\n");
6062 return false;
6064 target_support_p = (optab_handler (optab, vec_mode)
6065 != CODE_FOR_nothing);
6068 if (!target_support_p)
6070 if (dump_enabled_p ())
6071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6072 "op not supported by target.\n");
6073 /* Check only during analysis. */
6074 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6075 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6076 return false;
6077 if (dump_enabled_p ())
6078 dump_printf_loc (MSG_NOTE, vect_location,
6079 "proceeding using word mode.\n");
6082 /* Worthwhile without SIMD support? Check only during analysis. */
6083 if (!VECTOR_MODE_P (vec_mode)
6084 && !vec_stmt
6085 && !vect_worthwhile_without_simd_p (vinfo, code))
6087 if (dump_enabled_p ())
6088 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6089 "not worthwhile without SIMD support.\n");
6090 return false;
6093 if (!vec_stmt) /* transformation not required. */
6095 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6096 DUMP_VECT_SCOPE ("vectorizable_operation");
6097 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6098 return true;
6101 /* Transform. */
6103 if (dump_enabled_p ())
6104 dump_printf_loc (MSG_NOTE, vect_location,
6105 "transform binary/unary operation.\n");
6107 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6108 vectors with unsigned elements, but the result is signed. So, we
6109 need to compute the MINUS_EXPR into vectype temporary and
6110 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6111 tree vec_cvt_dest = NULL_TREE;
6112 if (orig_code == POINTER_DIFF_EXPR)
6114 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6115 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6117 /* Handle def. */
6118 else
6119 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6121 /* In case the vectorization factor (VF) is bigger than the number
6122 of elements that we can fit in a vectype (nunits), we have to generate
6123 more than one vector stmt - i.e - we need to "unroll" the
6124 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6125 from one copy of the vector stmt to the next, in the field
6126 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6127 stages to find the correct vector defs to be used when vectorizing
6128 stmts that use the defs of the current stmt. The example below
6129 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6130 we need to create 4 vectorized stmts):
6132 before vectorization:
6133 RELATED_STMT VEC_STMT
6134 S1: x = memref - -
6135 S2: z = x + 1 - -
6137 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6138 there):
6139 RELATED_STMT VEC_STMT
6140 VS1_0: vx0 = memref0 VS1_1 -
6141 VS1_1: vx1 = memref1 VS1_2 -
6142 VS1_2: vx2 = memref2 VS1_3 -
6143 VS1_3: vx3 = memref3 - -
6144 S1: x = load - VS1_0
6145 S2: z = x + 1 - -
6147 step2: vectorize stmt S2 (done here):
6148 To vectorize stmt S2 we first need to find the relevant vector
6149 def for the first operand 'x'. This is, as usual, obtained from
6150 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6151 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6152 relevant vector def 'vx0'. Having found 'vx0' we can generate
6153 the vector stmt VS2_0, and as usual, record it in the
6154 STMT_VINFO_VEC_STMT of stmt S2.
6155 When creating the second copy (VS2_1), we obtain the relevant vector
6156 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6157 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6158 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6159 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6160 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6161 chain of stmts and pointers:
6162 RELATED_STMT VEC_STMT
6163 VS1_0: vx0 = memref0 VS1_1 -
6164 VS1_1: vx1 = memref1 VS1_2 -
6165 VS1_2: vx2 = memref2 VS1_3 -
6166 VS1_3: vx3 = memref3 - -
6167 S1: x = load - VS1_0
6168 VS2_0: vz0 = vx0 + v1 VS2_1 -
6169 VS2_1: vz1 = vx1 + v1 VS2_2 -
6170 VS2_2: vz2 = vx2 + v1 VS2_3 -
6171 VS2_3: vz3 = vx3 + v1 - -
6172 S2: z = x + 1 - VS2_0 */
6174 prev_stmt_info = NULL;
6175 for (j = 0; j < ncopies; j++)
6177 /* Handle uses. */
6178 if (j == 0)
6180 if (op_type == binary_op)
6181 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6182 slp_node);
6183 else if (op_type == ternary_op)
6185 if (slp_node)
6187 auto_vec<tree> ops(3);
6188 ops.quick_push (op0);
6189 ops.quick_push (op1);
6190 ops.quick_push (op2);
6191 auto_vec<vec<tree> > vec_defs(3);
6192 vect_get_slp_defs (ops, slp_node, &vec_defs);
6193 vec_oprnds0 = vec_defs[0];
6194 vec_oprnds1 = vec_defs[1];
6195 vec_oprnds2 = vec_defs[2];
6197 else
6199 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6200 &vec_oprnds1, NULL);
6201 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6202 NULL, NULL);
6205 else
6206 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6207 slp_node);
6209 else
6211 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6212 if (op_type == ternary_op)
6214 tree vec_oprnd = vec_oprnds2.pop ();
6215 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6216 vec_oprnd));
6220 /* Arguments are ready. Create the new vector stmt. */
6221 stmt_vec_info new_stmt_info = NULL;
6222 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6224 vop1 = ((op_type == binary_op || op_type == ternary_op)
6225 ? vec_oprnds1[i] : NULL_TREE);
6226 vop2 = ((op_type == ternary_op)
6227 ? vec_oprnds2[i] : NULL_TREE);
6228 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6229 vop0, vop1, vop2);
6230 new_temp = make_ssa_name (vec_dest, new_stmt);
6231 gimple_assign_set_lhs (new_stmt, new_temp);
6232 new_stmt_info
6233 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6234 if (vec_cvt_dest)
6236 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6237 gassign *new_stmt
6238 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6239 new_temp);
6240 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6241 gimple_assign_set_lhs (new_stmt, new_temp);
6242 new_stmt_info
6243 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6245 if (slp_node)
6246 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6249 if (slp_node)
6250 continue;
6252 if (j == 0)
6253 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6254 else
6255 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6256 prev_stmt_info = new_stmt_info;
6259 vec_oprnds0.release ();
6260 vec_oprnds1.release ();
6261 vec_oprnds2.release ();
6263 return true;
6266 /* A helper function to ensure data reference DR_INFO's base alignment. */
6268 static void
6269 ensure_base_align (dr_vec_info *dr_info)
6271 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6272 return;
6274 if (dr_info->base_misaligned)
6276 tree base_decl = dr_info->base_decl;
6278 // We should only be able to increase the alignment of a base object if
6279 // we know what its new alignment should be at compile time.
6280 unsigned HOST_WIDE_INT align_base_to =
6281 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6283 if (decl_in_symtab_p (base_decl))
6284 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6285 else
6287 SET_DECL_ALIGN (base_decl, align_base_to);
6288 DECL_USER_ALIGN (base_decl) = 1;
6290 dr_info->base_misaligned = false;
6295 /* Function get_group_alias_ptr_type.
6297 Return the alias type for the group starting at FIRST_STMT_INFO. */
6299 static tree
6300 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6302 struct data_reference *first_dr, *next_dr;
6304 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6305 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6306 while (next_stmt_info)
6308 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6309 if (get_alias_set (DR_REF (first_dr))
6310 != get_alias_set (DR_REF (next_dr)))
6312 if (dump_enabled_p ())
6313 dump_printf_loc (MSG_NOTE, vect_location,
6314 "conflicting alias set types.\n");
6315 return ptr_type_node;
6317 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6319 return reference_alias_ptr_type (DR_REF (first_dr));
6323 /* Function vectorizable_store.
6325 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6326 that can be vectorized.
6327 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6328 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6329 Return true if STMT_INFO is vectorizable in this way. */
6331 static bool
6332 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6333 stmt_vec_info *vec_stmt, slp_tree slp_node,
6334 stmt_vector_for_cost *cost_vec)
6336 tree data_ref;
6337 tree op;
6338 tree vec_oprnd = NULL_TREE;
6339 tree elem_type;
6340 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6341 struct loop *loop = NULL;
6342 machine_mode vec_mode;
6343 tree dummy;
6344 enum dr_alignment_support alignment_support_scheme;
6345 enum vect_def_type rhs_dt = vect_unknown_def_type;
6346 enum vect_def_type mask_dt = vect_unknown_def_type;
6347 stmt_vec_info prev_stmt_info = NULL;
6348 tree dataref_ptr = NULL_TREE;
6349 tree dataref_offset = NULL_TREE;
6350 gimple *ptr_incr = NULL;
6351 int ncopies;
6352 int j;
6353 stmt_vec_info first_stmt_info;
6354 bool grouped_store;
6355 unsigned int group_size, i;
6356 vec<tree> oprnds = vNULL;
6357 vec<tree> result_chain = vNULL;
6358 tree offset = NULL_TREE;
6359 vec<tree> vec_oprnds = vNULL;
6360 bool slp = (slp_node != NULL);
6361 unsigned int vec_num;
6362 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6363 vec_info *vinfo = stmt_info->vinfo;
6364 tree aggr_type;
6365 gather_scatter_info gs_info;
6366 poly_uint64 vf;
6367 vec_load_store_type vls_type;
6368 tree ref_type;
6370 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6371 return false;
6373 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6374 && ! vec_stmt)
6375 return false;
6377 /* Is vectorizable store? */
6379 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6380 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6382 tree scalar_dest = gimple_assign_lhs (assign);
6383 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6384 && is_pattern_stmt_p (stmt_info))
6385 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6386 if (TREE_CODE (scalar_dest) != ARRAY_REF
6387 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6388 && TREE_CODE (scalar_dest) != INDIRECT_REF
6389 && TREE_CODE (scalar_dest) != COMPONENT_REF
6390 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6391 && TREE_CODE (scalar_dest) != REALPART_EXPR
6392 && TREE_CODE (scalar_dest) != MEM_REF)
6393 return false;
6395 else
6397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6398 if (!call || !gimple_call_internal_p (call))
6399 return false;
6401 internal_fn ifn = gimple_call_internal_fn (call);
6402 if (!internal_store_fn_p (ifn))
6403 return false;
6405 if (slp_node != NULL)
6407 if (dump_enabled_p ())
6408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6409 "SLP of masked stores not supported.\n");
6410 return false;
6413 int mask_index = internal_fn_mask_index (ifn);
6414 if (mask_index >= 0)
6416 mask = gimple_call_arg (call, mask_index);
6417 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6418 &mask_vectype))
6419 return false;
6423 op = vect_get_store_rhs (stmt_info);
6425 /* Cannot have hybrid store SLP -- that would mean storing to the
6426 same location twice. */
6427 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6429 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6430 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6432 if (loop_vinfo)
6434 loop = LOOP_VINFO_LOOP (loop_vinfo);
6435 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6437 else
6438 vf = 1;
6440 /* Multiple types in SLP are handled by creating the appropriate number of
6441 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6442 case of SLP. */
6443 if (slp)
6444 ncopies = 1;
6445 else
6446 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6448 gcc_assert (ncopies >= 1);
6450 /* FORNOW. This restriction should be relaxed. */
6451 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6453 if (dump_enabled_p ())
6454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6455 "multiple types in nested loop.\n");
6456 return false;
6459 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6460 return false;
6462 elem_type = TREE_TYPE (vectype);
6463 vec_mode = TYPE_MODE (vectype);
6465 if (!STMT_VINFO_DATA_REF (stmt_info))
6466 return false;
6468 vect_memory_access_type memory_access_type;
6469 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6470 &memory_access_type, &gs_info))
6471 return false;
6473 if (mask)
6475 if (memory_access_type == VMAT_CONTIGUOUS)
6477 if (!VECTOR_MODE_P (vec_mode)
6478 || !can_vec_mask_load_store_p (vec_mode,
6479 TYPE_MODE (mask_vectype), false))
6480 return false;
6482 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6483 && (memory_access_type != VMAT_GATHER_SCATTER
6484 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
6486 if (dump_enabled_p ())
6487 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6488 "unsupported access type for masked store.\n");
6489 return false;
6492 else
6494 /* FORNOW. In some cases can vectorize even if data-type not supported
6495 (e.g. - array initialization with 0). */
6496 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6497 return false;
6500 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
6501 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6502 && memory_access_type != VMAT_GATHER_SCATTER
6503 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6504 if (grouped_store)
6506 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6507 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6508 group_size = DR_GROUP_SIZE (first_stmt_info);
6510 else
6512 first_stmt_info = stmt_info;
6513 first_dr_info = dr_info;
6514 group_size = vec_num = 1;
6517 if (!vec_stmt) /* transformation not required. */
6519 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6521 if (loop_vinfo
6522 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6523 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6524 memory_access_type, &gs_info);
6526 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6527 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6528 vls_type, slp_node, cost_vec);
6529 return true;
6531 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6533 /* Transform. */
6535 ensure_base_align (dr_info);
6537 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6539 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6540 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6541 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6542 tree ptr, var, scale, vec_mask;
6543 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
6544 tree mask_halfvectype = mask_vectype;
6545 edge pe = loop_preheader_edge (loop);
6546 gimple_seq seq;
6547 basic_block new_bb;
6548 enum { NARROW, NONE, WIDEN } modifier;
6549 poly_uint64 scatter_off_nunits
6550 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6552 if (known_eq (nunits, scatter_off_nunits))
6553 modifier = NONE;
6554 else if (known_eq (nunits * 2, scatter_off_nunits))
6556 modifier = WIDEN;
6558 /* Currently gathers and scatters are only supported for
6559 fixed-length vectors. */
6560 unsigned int count = scatter_off_nunits.to_constant ();
6561 vec_perm_builder sel (count, count, 1);
6562 for (i = 0; i < (unsigned int) count; ++i)
6563 sel.quick_push (i | (count / 2));
6565 vec_perm_indices indices (sel, 1, count);
6566 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6567 indices);
6568 gcc_assert (perm_mask != NULL_TREE);
6570 else if (known_eq (nunits, scatter_off_nunits * 2))
6572 modifier = NARROW;
6574 /* Currently gathers and scatters are only supported for
6575 fixed-length vectors. */
6576 unsigned int count = nunits.to_constant ();
6577 vec_perm_builder sel (count, count, 1);
6578 for (i = 0; i < (unsigned int) count; ++i)
6579 sel.quick_push (i | (count / 2));
6581 vec_perm_indices indices (sel, 2, count);
6582 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6583 gcc_assert (perm_mask != NULL_TREE);
6584 ncopies *= 2;
6586 if (mask)
6587 mask_halfvectype
6588 = build_same_sized_truth_vector_type (gs_info.offset_vectype);
6590 else
6591 gcc_unreachable ();
6593 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6594 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6595 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6596 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6597 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6598 scaletype = TREE_VALUE (arglist);
6600 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6601 && TREE_CODE (rettype) == VOID_TYPE);
6603 ptr = fold_convert (ptrtype, gs_info.base);
6604 if (!is_gimple_min_invariant (ptr))
6606 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6607 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6608 gcc_assert (!new_bb);
6611 if (mask == NULL_TREE)
6613 mask_arg = build_int_cst (masktype, -1);
6614 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
6617 scale = build_int_cst (scaletype, gs_info.scale);
6619 prev_stmt_info = NULL;
6620 for (j = 0; j < ncopies; ++j)
6622 if (j == 0)
6624 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
6625 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
6626 stmt_info);
6627 if (mask)
6628 mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
6629 stmt_info);
6631 else if (modifier != NONE && (j & 1))
6633 if (modifier == WIDEN)
6636 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6637 vec_oprnd1);
6638 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6639 stmt_info, gsi);
6640 if (mask)
6641 mask_op
6642 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6643 vec_mask);
6645 else if (modifier == NARROW)
6647 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6648 stmt_info, gsi);
6649 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6650 vec_oprnd0);
6652 else
6653 gcc_unreachable ();
6655 else
6657 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6658 vec_oprnd1);
6659 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6660 vec_oprnd0);
6661 if (mask)
6662 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6663 vec_mask);
6666 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6668 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6669 TYPE_VECTOR_SUBPARTS (srctype)));
6670 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6671 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6672 gassign *new_stmt
6673 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6674 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6675 src = var;
6678 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6680 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6681 TYPE_VECTOR_SUBPARTS (idxtype)));
6682 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6683 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6684 gassign *new_stmt
6685 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6686 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6687 op = var;
6690 if (mask)
6692 tree utype;
6693 mask_arg = mask_op;
6694 if (modifier == NARROW)
6696 var = vect_get_new_ssa_name (mask_halfvectype,
6697 vect_simple_var);
6698 gassign *new_stmt
6699 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
6700 : VEC_UNPACK_LO_EXPR,
6701 mask_op);
6702 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6703 mask_arg = var;
6705 tree optype = TREE_TYPE (mask_arg);
6706 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
6707 utype = masktype;
6708 else
6709 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
6710 var = vect_get_new_ssa_name (utype, vect_scalar_var);
6711 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
6712 gassign *new_stmt
6713 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
6714 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6715 mask_arg = var;
6716 if (!useless_type_conversion_p (masktype, utype))
6718 gcc_assert (TYPE_PRECISION (utype)
6719 <= TYPE_PRECISION (masktype));
6720 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
6721 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
6722 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6723 mask_arg = var;
6727 gcall *new_stmt
6728 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
6729 stmt_vec_info new_stmt_info
6730 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6732 if (prev_stmt_info == NULL)
6733 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6734 else
6735 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6736 prev_stmt_info = new_stmt_info;
6738 return true;
6741 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6742 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6744 if (grouped_store)
6746 /* FORNOW */
6747 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6749 /* We vectorize all the stmts of the interleaving group when we
6750 reach the last stmt in the group. */
6751 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6752 < DR_GROUP_SIZE (first_stmt_info)
6753 && !slp)
6755 *vec_stmt = NULL;
6756 return true;
6759 if (slp)
6761 grouped_store = false;
6762 /* VEC_NUM is the number of vect stmts to be created for this
6763 group. */
6764 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6765 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6766 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6767 == first_stmt_info);
6768 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6769 op = vect_get_store_rhs (first_stmt_info);
6771 else
6772 /* VEC_NUM is the number of vect stmts to be created for this
6773 group. */
6774 vec_num = group_size;
6776 ref_type = get_group_alias_ptr_type (first_stmt_info);
6778 else
6779 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
6781 if (dump_enabled_p ())
6782 dump_printf_loc (MSG_NOTE, vect_location,
6783 "transform store. ncopies = %d\n", ncopies);
6785 if (memory_access_type == VMAT_ELEMENTWISE
6786 || memory_access_type == VMAT_STRIDED_SLP)
6788 gimple_stmt_iterator incr_gsi;
6789 bool insert_after;
6790 gimple *incr;
6791 tree offvar;
6792 tree ivstep;
6793 tree running_off;
6794 tree stride_base, stride_step, alias_off;
6795 tree vec_oprnd;
6796 unsigned int g;
6797 /* Checked by get_load_store_type. */
6798 unsigned int const_nunits = nunits.to_constant ();
6800 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6801 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6803 stride_base
6804 = fold_build_pointer_plus
6805 (DR_BASE_ADDRESS (first_dr_info->dr),
6806 size_binop (PLUS_EXPR,
6807 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6808 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6809 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
6811 /* For a store with loop-invariant (but other than power-of-2)
6812 stride (i.e. not a grouped access) like so:
6814 for (i = 0; i < n; i += stride)
6815 array[i] = ...;
6817 we generate a new induction variable and new stores from
6818 the components of the (vectorized) rhs:
6820 for (j = 0; ; j += VF*stride)
6821 vectemp = ...;
6822 tmp1 = vectemp[0];
6823 array[j] = tmp1;
6824 tmp2 = vectemp[1];
6825 array[j + stride] = tmp2;
6829 unsigned nstores = const_nunits;
6830 unsigned lnel = 1;
6831 tree ltype = elem_type;
6832 tree lvectype = vectype;
6833 if (slp)
6835 if (group_size < const_nunits
6836 && const_nunits % group_size == 0)
6838 nstores = const_nunits / group_size;
6839 lnel = group_size;
6840 ltype = build_vector_type (elem_type, group_size);
6841 lvectype = vectype;
6843 /* First check if vec_extract optab doesn't support extraction
6844 of vector elts directly. */
6845 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6846 machine_mode vmode;
6847 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6848 || !VECTOR_MODE_P (vmode)
6849 || !targetm.vector_mode_supported_p (vmode)
6850 || (convert_optab_handler (vec_extract_optab,
6851 TYPE_MODE (vectype), vmode)
6852 == CODE_FOR_nothing))
6854 /* Try to avoid emitting an extract of vector elements
6855 by performing the extracts using an integer type of the
6856 same size, extracting from a vector of those and then
6857 re-interpreting it as the original vector type if
6858 supported. */
6859 unsigned lsize
6860 = group_size * GET_MODE_BITSIZE (elmode);
6861 unsigned int lnunits = const_nunits / group_size;
6862 /* If we can't construct such a vector fall back to
6863 element extracts from the original vector type and
6864 element size stores. */
6865 if (int_mode_for_size (lsize, 0).exists (&elmode)
6866 && mode_for_vector (elmode, lnunits).exists (&vmode)
6867 && VECTOR_MODE_P (vmode)
6868 && targetm.vector_mode_supported_p (vmode)
6869 && (convert_optab_handler (vec_extract_optab,
6870 vmode, elmode)
6871 != CODE_FOR_nothing))
6873 nstores = lnunits;
6874 lnel = group_size;
6875 ltype = build_nonstandard_integer_type (lsize, 1);
6876 lvectype = build_vector_type (ltype, nstores);
6878 /* Else fall back to vector extraction anyway.
6879 Fewer stores are more important than avoiding spilling
6880 of the vector we extract from. Compared to the
6881 construction case in vectorizable_load no store-forwarding
6882 issue exists here for reasonable archs. */
6885 else if (group_size >= const_nunits
6886 && group_size % const_nunits == 0)
6888 nstores = 1;
6889 lnel = const_nunits;
6890 ltype = vectype;
6891 lvectype = vectype;
6893 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6894 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6897 ivstep = stride_step;
6898 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6899 build_int_cst (TREE_TYPE (ivstep), vf));
6901 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6903 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6904 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6905 create_iv (stride_base, ivstep, NULL,
6906 loop, &incr_gsi, insert_after,
6907 &offvar, NULL);
6908 incr = gsi_stmt (incr_gsi);
6909 loop_vinfo->add_stmt (incr);
6911 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6913 prev_stmt_info = NULL;
6914 alias_off = build_int_cst (ref_type, 0);
6915 stmt_vec_info next_stmt_info = first_stmt_info;
6916 for (g = 0; g < group_size; g++)
6918 running_off = offvar;
6919 if (g)
6921 tree size = TYPE_SIZE_UNIT (ltype);
6922 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6923 size);
6924 tree newoff = copy_ssa_name (running_off, NULL);
6925 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6926 running_off, pos);
6927 vect_finish_stmt_generation (stmt_info, incr, gsi);
6928 running_off = newoff;
6930 unsigned int group_el = 0;
6931 unsigned HOST_WIDE_INT
6932 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6933 for (j = 0; j < ncopies; j++)
6935 /* We've set op and dt above, from vect_get_store_rhs,
6936 and first_stmt_info == stmt_info. */
6937 if (j == 0)
6939 if (slp)
6941 vect_get_vec_defs (op, NULL_TREE, stmt_info,
6942 &vec_oprnds, NULL, slp_node);
6943 vec_oprnd = vec_oprnds[0];
6945 else
6947 op = vect_get_store_rhs (next_stmt_info);
6948 vec_oprnd = vect_get_vec_def_for_operand
6949 (op, next_stmt_info);
6952 else
6954 if (slp)
6955 vec_oprnd = vec_oprnds[j];
6956 else
6957 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6958 vec_oprnd);
6960 /* Pun the vector to extract from if necessary. */
6961 if (lvectype != vectype)
6963 tree tem = make_ssa_name (lvectype);
6964 gimple *pun
6965 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6966 lvectype, vec_oprnd));
6967 vect_finish_stmt_generation (stmt_info, pun, gsi);
6968 vec_oprnd = tem;
6970 for (i = 0; i < nstores; i++)
6972 tree newref, newoff;
6973 gimple *incr, *assign;
6974 tree size = TYPE_SIZE (ltype);
6975 /* Extract the i'th component. */
6976 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6977 bitsize_int (i), size);
6978 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6979 size, pos);
6981 elem = force_gimple_operand_gsi (gsi, elem, true,
6982 NULL_TREE, true,
6983 GSI_SAME_STMT);
6985 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6986 group_el * elsz);
6987 newref = build2 (MEM_REF, ltype,
6988 running_off, this_off);
6989 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
6991 /* And store it to *running_off. */
6992 assign = gimple_build_assign (newref, elem);
6993 stmt_vec_info assign_info
6994 = vect_finish_stmt_generation (stmt_info, assign, gsi);
6996 group_el += lnel;
6997 if (! slp
6998 || group_el == group_size)
7000 newoff = copy_ssa_name (running_off, NULL);
7001 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7002 running_off, stride_step);
7003 vect_finish_stmt_generation (stmt_info, incr, gsi);
7005 running_off = newoff;
7006 group_el = 0;
7008 if (g == group_size - 1
7009 && !slp)
7011 if (j == 0 && i == 0)
7012 STMT_VINFO_VEC_STMT (stmt_info)
7013 = *vec_stmt = assign_info;
7014 else
7015 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
7016 prev_stmt_info = assign_info;
7020 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7021 if (slp)
7022 break;
7025 vec_oprnds.release ();
7026 return true;
7029 auto_vec<tree> dr_chain (group_size);
7030 oprnds.create (group_size);
7032 alignment_support_scheme
7033 = vect_supportable_dr_alignment (first_dr_info, false);
7034 gcc_assert (alignment_support_scheme);
7035 vec_loop_masks *loop_masks
7036 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7037 ? &LOOP_VINFO_MASKS (loop_vinfo)
7038 : NULL);
7039 /* Targets with store-lane instructions must not require explicit
7040 realignment. vect_supportable_dr_alignment always returns either
7041 dr_aligned or dr_unaligned_supported for masked operations. */
7042 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7043 && !mask
7044 && !loop_masks)
7045 || alignment_support_scheme == dr_aligned
7046 || alignment_support_scheme == dr_unaligned_supported);
7048 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7049 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7050 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7052 tree bump;
7053 tree vec_offset = NULL_TREE;
7054 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7056 aggr_type = NULL_TREE;
7057 bump = NULL_TREE;
7059 else if (memory_access_type == VMAT_GATHER_SCATTER)
7061 aggr_type = elem_type;
7062 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7063 &bump, &vec_offset);
7065 else
7067 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7068 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7069 else
7070 aggr_type = vectype;
7071 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
7072 memory_access_type);
7075 if (mask)
7076 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7078 /* In case the vectorization factor (VF) is bigger than the number
7079 of elements that we can fit in a vectype (nunits), we have to generate
7080 more than one vector stmt - i.e - we need to "unroll" the
7081 vector stmt by a factor VF/nunits. For more details see documentation in
7082 vect_get_vec_def_for_copy_stmt. */
7084 /* In case of interleaving (non-unit grouped access):
7086 S1: &base + 2 = x2
7087 S2: &base = x0
7088 S3: &base + 1 = x1
7089 S4: &base + 3 = x3
7091 We create vectorized stores starting from base address (the access of the
7092 first stmt in the chain (S2 in the above example), when the last store stmt
7093 of the chain (S4) is reached:
7095 VS1: &base = vx2
7096 VS2: &base + vec_size*1 = vx0
7097 VS3: &base + vec_size*2 = vx1
7098 VS4: &base + vec_size*3 = vx3
7100 Then permutation statements are generated:
7102 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7103 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7106 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7107 (the order of the data-refs in the output of vect_permute_store_chain
7108 corresponds to the order of scalar stmts in the interleaving chain - see
7109 the documentation of vect_permute_store_chain()).
7111 In case of both multiple types and interleaving, above vector stores and
7112 permutation stmts are created for every copy. The result vector stmts are
7113 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7114 STMT_VINFO_RELATED_STMT for the next copies.
7117 prev_stmt_info = NULL;
7118 tree vec_mask = NULL_TREE;
7119 for (j = 0; j < ncopies; j++)
7121 stmt_vec_info new_stmt_info;
7122 if (j == 0)
7124 if (slp)
7126 /* Get vectorized arguments for SLP_NODE. */
7127 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
7128 NULL, slp_node);
7130 vec_oprnd = vec_oprnds[0];
7132 else
7134 /* For interleaved stores we collect vectorized defs for all the
7135 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7136 used as an input to vect_permute_store_chain(), and OPRNDS as
7137 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
7139 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7140 OPRNDS are of size 1. */
7141 stmt_vec_info next_stmt_info = first_stmt_info;
7142 for (i = 0; i < group_size; i++)
7144 /* Since gaps are not supported for interleaved stores,
7145 DR_GROUP_SIZE is the exact number of stmts in the chain.
7146 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7147 that there is no interleaving, DR_GROUP_SIZE is 1,
7148 and only one iteration of the loop will be executed. */
7149 op = vect_get_store_rhs (next_stmt_info);
7150 vec_oprnd = vect_get_vec_def_for_operand
7151 (op, next_stmt_info);
7152 dr_chain.quick_push (vec_oprnd);
7153 oprnds.quick_push (vec_oprnd);
7154 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7156 if (mask)
7157 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
7158 mask_vectype);
7161 /* We should have catched mismatched types earlier. */
7162 gcc_assert (useless_type_conversion_p (vectype,
7163 TREE_TYPE (vec_oprnd)));
7164 bool simd_lane_access_p
7165 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7166 if (simd_lane_access_p
7167 && !loop_masks
7168 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7169 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7170 && integer_zerop (DR_OFFSET (first_dr_info->dr))
7171 && integer_zerop (DR_INIT (first_dr_info->dr))
7172 && alias_sets_conflict_p (get_alias_set (aggr_type),
7173 get_alias_set (TREE_TYPE (ref_type))))
7175 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7176 dataref_offset = build_int_cst (ref_type, 0);
7178 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7179 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
7180 &dataref_ptr, &vec_offset);
7181 else
7182 dataref_ptr
7183 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
7184 simd_lane_access_p ? loop : NULL,
7185 offset, &dummy, gsi, &ptr_incr,
7186 simd_lane_access_p, NULL_TREE, bump);
7188 else
7190 /* For interleaved stores we created vectorized defs for all the
7191 defs stored in OPRNDS in the previous iteration (previous copy).
7192 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7193 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7194 next copy.
7195 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7196 OPRNDS are of size 1. */
7197 for (i = 0; i < group_size; i++)
7199 op = oprnds[i];
7200 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
7201 dr_chain[i] = vec_oprnd;
7202 oprnds[i] = vec_oprnd;
7204 if (mask)
7205 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
7206 if (dataref_offset)
7207 dataref_offset
7208 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7209 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7210 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7211 else
7212 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7213 stmt_info, bump);
7216 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7218 tree vec_array;
7220 /* Get an array into which we can store the individual vectors. */
7221 vec_array = create_vector_array (vectype, vec_num);
7223 /* Invalidate the current contents of VEC_ARRAY. This should
7224 become an RTL clobber too, which prevents the vector registers
7225 from being upward-exposed. */
7226 vect_clobber_variable (stmt_info, gsi, vec_array);
7228 /* Store the individual vectors into the array. */
7229 for (i = 0; i < vec_num; i++)
7231 vec_oprnd = dr_chain[i];
7232 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7235 tree final_mask = NULL;
7236 if (loop_masks)
7237 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7238 vectype, j);
7239 if (vec_mask)
7240 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7241 vec_mask, gsi);
7243 gcall *call;
7244 if (final_mask)
7246 /* Emit:
7247 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7248 VEC_ARRAY). */
7249 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7250 tree alias_ptr = build_int_cst (ref_type, align);
7251 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7252 dataref_ptr, alias_ptr,
7253 final_mask, vec_array);
7255 else
7257 /* Emit:
7258 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7259 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7260 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7261 vec_array);
7262 gimple_call_set_lhs (call, data_ref);
7264 gimple_call_set_nothrow (call, true);
7265 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7267 /* Record that VEC_ARRAY is now dead. */
7268 vect_clobber_variable (stmt_info, gsi, vec_array);
7270 else
7272 new_stmt_info = NULL;
7273 if (grouped_store)
7275 if (j == 0)
7276 result_chain.create (group_size);
7277 /* Permute. */
7278 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7279 &result_chain);
7282 stmt_vec_info next_stmt_info = first_stmt_info;
7283 for (i = 0; i < vec_num; i++)
7285 unsigned misalign;
7286 unsigned HOST_WIDE_INT align;
7288 tree final_mask = NULL_TREE;
7289 if (loop_masks)
7290 final_mask = vect_get_loop_mask (gsi, loop_masks,
7291 vec_num * ncopies,
7292 vectype, vec_num * j + i);
7293 if (vec_mask)
7294 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7295 vec_mask, gsi);
7297 if (memory_access_type == VMAT_GATHER_SCATTER)
7299 tree scale = size_int (gs_info.scale);
7300 gcall *call;
7301 if (loop_masks)
7302 call = gimple_build_call_internal
7303 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7304 scale, vec_oprnd, final_mask);
7305 else
7306 call = gimple_build_call_internal
7307 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7308 scale, vec_oprnd);
7309 gimple_call_set_nothrow (call, true);
7310 new_stmt_info
7311 = vect_finish_stmt_generation (stmt_info, call, gsi);
7312 break;
7315 if (i > 0)
7316 /* Bump the vector pointer. */
7317 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7318 stmt_info, bump);
7320 if (slp)
7321 vec_oprnd = vec_oprnds[i];
7322 else if (grouped_store)
7323 /* For grouped stores vectorized defs are interleaved in
7324 vect_permute_store_chain(). */
7325 vec_oprnd = result_chain[i];
7327 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
7328 if (aligned_access_p (first_dr_info))
7329 misalign = 0;
7330 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7332 align = dr_alignment (vect_dr_behavior (first_dr_info));
7333 misalign = 0;
7335 else
7336 misalign = DR_MISALIGNMENT (first_dr_info);
7337 if (dataref_offset == NULL_TREE
7338 && TREE_CODE (dataref_ptr) == SSA_NAME)
7339 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7340 misalign);
7342 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7344 tree perm_mask = perm_mask_for_reverse (vectype);
7345 tree perm_dest = vect_create_destination_var
7346 (vect_get_store_rhs (stmt_info), vectype);
7347 tree new_temp = make_ssa_name (perm_dest);
7349 /* Generate the permute statement. */
7350 gimple *perm_stmt
7351 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7352 vec_oprnd, perm_mask);
7353 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7355 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7356 vec_oprnd = new_temp;
7359 /* Arguments are ready. Create the new vector stmt. */
7360 if (final_mask)
7362 align = least_bit_hwi (misalign | align);
7363 tree ptr = build_int_cst (ref_type, align);
7364 gcall *call
7365 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7366 dataref_ptr, ptr,
7367 final_mask, vec_oprnd);
7368 gimple_call_set_nothrow (call, true);
7369 new_stmt_info
7370 = vect_finish_stmt_generation (stmt_info, call, gsi);
7372 else
7374 data_ref = fold_build2 (MEM_REF, vectype,
7375 dataref_ptr,
7376 dataref_offset
7377 ? dataref_offset
7378 : build_int_cst (ref_type, 0));
7379 if (aligned_access_p (first_dr_info))
7381 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7382 TREE_TYPE (data_ref)
7383 = build_aligned_type (TREE_TYPE (data_ref),
7384 align * BITS_PER_UNIT);
7385 else
7386 TREE_TYPE (data_ref)
7387 = build_aligned_type (TREE_TYPE (data_ref),
7388 TYPE_ALIGN (elem_type));
7389 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7390 gassign *new_stmt
7391 = gimple_build_assign (data_ref, vec_oprnd);
7392 new_stmt_info
7393 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7396 if (slp)
7397 continue;
7399 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7400 if (!next_stmt_info)
7401 break;
7404 if (!slp)
7406 if (j == 0)
7407 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7408 else
7409 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7410 prev_stmt_info = new_stmt_info;
7414 oprnds.release ();
7415 result_chain.release ();
7416 vec_oprnds.release ();
7418 return true;
7421 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7422 VECTOR_CST mask. No checks are made that the target platform supports the
7423 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7424 vect_gen_perm_mask_checked. */
7426 tree
7427 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7429 tree mask_type;
7431 poly_uint64 nunits = sel.length ();
7432 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7434 mask_type = build_vector_type (ssizetype, nunits);
7435 return vec_perm_indices_to_tree (mask_type, sel);
7438 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7439 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7441 tree
7442 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7444 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7445 return vect_gen_perm_mask_any (vectype, sel);
7448 /* Given a vector variable X and Y, that was generated for the scalar
7449 STMT_INFO, generate instructions to permute the vector elements of X and Y
7450 using permutation mask MASK_VEC, insert them at *GSI and return the
7451 permuted vector variable. */
7453 static tree
7454 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7455 gimple_stmt_iterator *gsi)
7457 tree vectype = TREE_TYPE (x);
7458 tree perm_dest, data_ref;
7459 gimple *perm_stmt;
7461 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7462 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
7463 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7464 else
7465 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7466 data_ref = make_ssa_name (perm_dest);
7468 /* Generate the permute statement. */
7469 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7470 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7472 return data_ref;
7475 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7476 inserting them on the loops preheader edge. Returns true if we
7477 were successful in doing so (and thus STMT_INFO can be moved then),
7478 otherwise returns false. */
7480 static bool
7481 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7483 ssa_op_iter i;
7484 tree op;
7485 bool any = false;
7487 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7489 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7490 if (!gimple_nop_p (def_stmt)
7491 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7493 /* Make sure we don't need to recurse. While we could do
7494 so in simple cases when there are more complex use webs
7495 we don't have an easy way to preserve stmt order to fulfil
7496 dependencies within them. */
7497 tree op2;
7498 ssa_op_iter i2;
7499 if (gimple_code (def_stmt) == GIMPLE_PHI)
7500 return false;
7501 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7503 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7504 if (!gimple_nop_p (def_stmt2)
7505 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7506 return false;
7508 any = true;
7512 if (!any)
7513 return true;
7515 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7517 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7518 if (!gimple_nop_p (def_stmt)
7519 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7521 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7522 gsi_remove (&gsi, false);
7523 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7527 return true;
7530 /* vectorizable_load.
7532 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7533 that can be vectorized.
7534 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7535 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7536 Return true if STMT_INFO is vectorizable in this way. */
7538 static bool
7539 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7540 stmt_vec_info *vec_stmt, slp_tree slp_node,
7541 slp_instance slp_node_instance,
7542 stmt_vector_for_cost *cost_vec)
7544 tree scalar_dest;
7545 tree vec_dest = NULL;
7546 tree data_ref = NULL;
7547 stmt_vec_info prev_stmt_info;
7548 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7549 struct loop *loop = NULL;
7550 struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7551 bool nested_in_vect_loop = false;
7552 tree elem_type;
7553 tree new_temp;
7554 machine_mode mode;
7555 tree dummy;
7556 enum dr_alignment_support alignment_support_scheme;
7557 tree dataref_ptr = NULL_TREE;
7558 tree dataref_offset = NULL_TREE;
7559 gimple *ptr_incr = NULL;
7560 int ncopies;
7561 int i, j;
7562 unsigned int group_size;
7563 poly_uint64 group_gap_adj;
7564 tree msq = NULL_TREE, lsq;
7565 tree offset = NULL_TREE;
7566 tree byte_offset = NULL_TREE;
7567 tree realignment_token = NULL_TREE;
7568 gphi *phi = NULL;
7569 vec<tree> dr_chain = vNULL;
7570 bool grouped_load = false;
7571 stmt_vec_info first_stmt_info;
7572 stmt_vec_info first_stmt_info_for_drptr = NULL;
7573 bool compute_in_loop = false;
7574 struct loop *at_loop;
7575 int vec_num;
7576 bool slp = (slp_node != NULL);
7577 bool slp_perm = false;
7578 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7579 poly_uint64 vf;
7580 tree aggr_type;
7581 gather_scatter_info gs_info;
7582 vec_info *vinfo = stmt_info->vinfo;
7583 tree ref_type;
7584 enum vect_def_type mask_dt = vect_unknown_def_type;
7586 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7587 return false;
7589 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7590 && ! vec_stmt)
7591 return false;
7593 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7594 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7596 scalar_dest = gimple_assign_lhs (assign);
7597 if (TREE_CODE (scalar_dest) != SSA_NAME)
7598 return false;
7600 tree_code code = gimple_assign_rhs_code (assign);
7601 if (code != ARRAY_REF
7602 && code != BIT_FIELD_REF
7603 && code != INDIRECT_REF
7604 && code != COMPONENT_REF
7605 && code != IMAGPART_EXPR
7606 && code != REALPART_EXPR
7607 && code != MEM_REF
7608 && TREE_CODE_CLASS (code) != tcc_declaration)
7609 return false;
7611 else
7613 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7614 if (!call || !gimple_call_internal_p (call))
7615 return false;
7617 internal_fn ifn = gimple_call_internal_fn (call);
7618 if (!internal_load_fn_p (ifn))
7619 return false;
7621 scalar_dest = gimple_call_lhs (call);
7622 if (!scalar_dest)
7623 return false;
7625 if (slp_node != NULL)
7627 if (dump_enabled_p ())
7628 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7629 "SLP of masked loads not supported.\n");
7630 return false;
7633 int mask_index = internal_fn_mask_index (ifn);
7634 if (mask_index >= 0)
7636 mask = gimple_call_arg (call, mask_index);
7637 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7638 &mask_vectype))
7639 return false;
7643 if (!STMT_VINFO_DATA_REF (stmt_info))
7644 return false;
7646 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7647 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7649 if (loop_vinfo)
7651 loop = LOOP_VINFO_LOOP (loop_vinfo);
7652 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7653 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7655 else
7656 vf = 1;
7658 /* Multiple types in SLP are handled by creating the appropriate number of
7659 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7660 case of SLP. */
7661 if (slp)
7662 ncopies = 1;
7663 else
7664 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7666 gcc_assert (ncopies >= 1);
7668 /* FORNOW. This restriction should be relaxed. */
7669 if (nested_in_vect_loop && ncopies > 1)
7671 if (dump_enabled_p ())
7672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7673 "multiple types in nested loop.\n");
7674 return false;
7677 /* Invalidate assumptions made by dependence analysis when vectorization
7678 on the unrolled body effectively re-orders stmts. */
7679 if (ncopies > 1
7680 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7681 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7682 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7684 if (dump_enabled_p ())
7685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7686 "cannot perform implicit CSE when unrolling "
7687 "with negative dependence distance\n");
7688 return false;
7691 elem_type = TREE_TYPE (vectype);
7692 mode = TYPE_MODE (vectype);
7694 /* FORNOW. In some cases can vectorize even if data-type not supported
7695 (e.g. - data copies). */
7696 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7698 if (dump_enabled_p ())
7699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7700 "Aligned load, but unsupported type.\n");
7701 return false;
7704 /* Check if the load is a part of an interleaving chain. */
7705 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7707 grouped_load = true;
7708 /* FORNOW */
7709 gcc_assert (!nested_in_vect_loop);
7710 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7712 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7713 group_size = DR_GROUP_SIZE (first_stmt_info);
7715 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7716 slp_perm = true;
7718 /* Invalidate assumptions made by dependence analysis when vectorization
7719 on the unrolled body effectively re-orders stmts. */
7720 if (!PURE_SLP_STMT (stmt_info)
7721 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7722 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7723 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7725 if (dump_enabled_p ())
7726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7727 "cannot perform implicit CSE when performing "
7728 "group loads with negative dependence distance\n");
7729 return false;
7732 else
7733 group_size = 1;
7735 vect_memory_access_type memory_access_type;
7736 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7737 &memory_access_type, &gs_info))
7738 return false;
7740 if (mask)
7742 if (memory_access_type == VMAT_CONTIGUOUS)
7744 machine_mode vec_mode = TYPE_MODE (vectype);
7745 if (!VECTOR_MODE_P (vec_mode)
7746 || !can_vec_mask_load_store_p (vec_mode,
7747 TYPE_MODE (mask_vectype), true))
7748 return false;
7750 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7751 && memory_access_type != VMAT_GATHER_SCATTER)
7753 if (dump_enabled_p ())
7754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7755 "unsupported access type for masked load.\n");
7756 return false;
7760 if (!vec_stmt) /* transformation not required. */
7762 if (!slp)
7763 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7765 if (loop_vinfo
7766 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7767 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7768 memory_access_type, &gs_info);
7770 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7771 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7772 slp_node_instance, slp_node, cost_vec);
7773 return true;
7776 if (!slp)
7777 gcc_assert (memory_access_type
7778 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7780 if (dump_enabled_p ())
7781 dump_printf_loc (MSG_NOTE, vect_location,
7782 "transform load. ncopies = %d\n", ncopies);
7784 /* Transform. */
7786 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7787 ensure_base_align (dr_info);
7789 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7791 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7792 return true;
7795 if (memory_access_type == VMAT_INVARIANT)
7797 gcc_assert (!grouped_load && !mask && !bb_vinfo);
7798 /* If we have versioned for aliasing or the loop doesn't
7799 have any data dependencies that would preclude this,
7800 then we are sure this is a loop invariant load and
7801 thus we can insert it on the preheader edge. */
7802 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7803 && !nested_in_vect_loop
7804 && hoist_defs_of_uses (stmt_info, loop));
7805 if (hoist_p)
7807 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7808 if (dump_enabled_p ())
7809 dump_printf_loc (MSG_NOTE, vect_location,
7810 "hoisting out of the vectorized loop: %G", stmt);
7811 scalar_dest = copy_ssa_name (scalar_dest);
7812 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7813 gsi_insert_on_edge_immediate
7814 (loop_preheader_edge (loop),
7815 gimple_build_assign (scalar_dest, rhs));
7817 /* These copies are all equivalent, but currently the representation
7818 requires a separate STMT_VINFO_VEC_STMT for each one. */
7819 prev_stmt_info = NULL;
7820 gimple_stmt_iterator gsi2 = *gsi;
7821 gsi_next (&gsi2);
7822 for (j = 0; j < ncopies; j++)
7824 stmt_vec_info new_stmt_info;
7825 if (hoist_p)
7827 new_temp = vect_init_vector (stmt_info, scalar_dest,
7828 vectype, NULL);
7829 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7830 new_stmt_info = vinfo->add_stmt (new_stmt);
7832 else
7834 new_temp = vect_init_vector (stmt_info, scalar_dest,
7835 vectype, &gsi2);
7836 new_stmt_info = vinfo->lookup_def (new_temp);
7838 if (slp)
7839 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7840 else if (j == 0)
7841 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7842 else
7843 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7844 prev_stmt_info = new_stmt_info;
7846 return true;
7849 if (memory_access_type == VMAT_ELEMENTWISE
7850 || memory_access_type == VMAT_STRIDED_SLP)
7852 gimple_stmt_iterator incr_gsi;
7853 bool insert_after;
7854 gimple *incr;
7855 tree offvar;
7856 tree ivstep;
7857 tree running_off;
7858 vec<constructor_elt, va_gc> *v = NULL;
7859 tree stride_base, stride_step, alias_off;
7860 /* Checked by get_load_store_type. */
7861 unsigned int const_nunits = nunits.to_constant ();
7862 unsigned HOST_WIDE_INT cst_offset = 0;
7864 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7865 gcc_assert (!nested_in_vect_loop);
7867 if (grouped_load)
7869 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7870 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7872 else
7874 first_stmt_info = stmt_info;
7875 first_dr_info = dr_info;
7877 if (slp && grouped_load)
7879 group_size = DR_GROUP_SIZE (first_stmt_info);
7880 ref_type = get_group_alias_ptr_type (first_stmt_info);
7882 else
7884 if (grouped_load)
7885 cst_offset
7886 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7887 * vect_get_place_in_interleaving_chain (stmt_info,
7888 first_stmt_info));
7889 group_size = 1;
7890 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7893 stride_base
7894 = fold_build_pointer_plus
7895 (DR_BASE_ADDRESS (first_dr_info->dr),
7896 size_binop (PLUS_EXPR,
7897 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7898 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7899 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7901 /* For a load with loop-invariant (but other than power-of-2)
7902 stride (i.e. not a grouped access) like so:
7904 for (i = 0; i < n; i += stride)
7905 ... = array[i];
7907 we generate a new induction variable and new accesses to
7908 form a new vector (or vectors, depending on ncopies):
7910 for (j = 0; ; j += VF*stride)
7911 tmp1 = array[j];
7912 tmp2 = array[j + stride];
7914 vectemp = {tmp1, tmp2, ...}
7917 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7918 build_int_cst (TREE_TYPE (stride_step), vf));
7920 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7922 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7923 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7924 create_iv (stride_base, ivstep, NULL,
7925 loop, &incr_gsi, insert_after,
7926 &offvar, NULL);
7927 incr = gsi_stmt (incr_gsi);
7928 loop_vinfo->add_stmt (incr);
7930 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7932 prev_stmt_info = NULL;
7933 running_off = offvar;
7934 alias_off = build_int_cst (ref_type, 0);
7935 int nloads = const_nunits;
7936 int lnel = 1;
7937 tree ltype = TREE_TYPE (vectype);
7938 tree lvectype = vectype;
7939 auto_vec<tree> dr_chain;
7940 if (memory_access_type == VMAT_STRIDED_SLP)
7942 if (group_size < const_nunits)
7944 /* First check if vec_init optab supports construction from
7945 vector elts directly. */
7946 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7947 machine_mode vmode;
7948 if (mode_for_vector (elmode, group_size).exists (&vmode)
7949 && VECTOR_MODE_P (vmode)
7950 && targetm.vector_mode_supported_p (vmode)
7951 && (convert_optab_handler (vec_init_optab,
7952 TYPE_MODE (vectype), vmode)
7953 != CODE_FOR_nothing))
7955 nloads = const_nunits / group_size;
7956 lnel = group_size;
7957 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7959 else
7961 /* Otherwise avoid emitting a constructor of vector elements
7962 by performing the loads using an integer type of the same
7963 size, constructing a vector of those and then
7964 re-interpreting it as the original vector type.
7965 This avoids a huge runtime penalty due to the general
7966 inability to perform store forwarding from smaller stores
7967 to a larger load. */
7968 unsigned lsize
7969 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7970 unsigned int lnunits = const_nunits / group_size;
7971 /* If we can't construct such a vector fall back to
7972 element loads of the original vector type. */
7973 if (int_mode_for_size (lsize, 0).exists (&elmode)
7974 && mode_for_vector (elmode, lnunits).exists (&vmode)
7975 && VECTOR_MODE_P (vmode)
7976 && targetm.vector_mode_supported_p (vmode)
7977 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7978 != CODE_FOR_nothing))
7980 nloads = lnunits;
7981 lnel = group_size;
7982 ltype = build_nonstandard_integer_type (lsize, 1);
7983 lvectype = build_vector_type (ltype, nloads);
7987 else
7989 nloads = 1;
7990 lnel = const_nunits;
7991 ltype = vectype;
7993 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7995 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7996 else if (nloads == 1)
7997 ltype = vectype;
7999 if (slp)
8001 /* For SLP permutation support we need to load the whole group,
8002 not only the number of vector stmts the permutation result
8003 fits in. */
8004 if (slp_perm)
8006 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8007 variable VF. */
8008 unsigned int const_vf = vf.to_constant ();
8009 ncopies = CEIL (group_size * const_vf, const_nunits);
8010 dr_chain.create (ncopies);
8012 else
8013 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8015 unsigned int group_el = 0;
8016 unsigned HOST_WIDE_INT
8017 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8018 for (j = 0; j < ncopies; j++)
8020 if (nloads > 1)
8021 vec_alloc (v, nloads);
8022 stmt_vec_info new_stmt_info = NULL;
8023 for (i = 0; i < nloads; i++)
8025 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8026 group_el * elsz + cst_offset);
8027 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8028 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8029 gassign *new_stmt
8030 = gimple_build_assign (make_ssa_name (ltype), data_ref);
8031 new_stmt_info
8032 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8033 if (nloads > 1)
8034 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8035 gimple_assign_lhs (new_stmt));
8037 group_el += lnel;
8038 if (! slp
8039 || group_el == group_size)
8041 tree newoff = copy_ssa_name (running_off);
8042 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8043 running_off, stride_step);
8044 vect_finish_stmt_generation (stmt_info, incr, gsi);
8046 running_off = newoff;
8047 group_el = 0;
8050 if (nloads > 1)
8052 tree vec_inv = build_constructor (lvectype, v);
8053 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
8054 new_stmt_info = vinfo->lookup_def (new_temp);
8055 if (lvectype != vectype)
8057 gassign *new_stmt
8058 = gimple_build_assign (make_ssa_name (vectype),
8059 VIEW_CONVERT_EXPR,
8060 build1 (VIEW_CONVERT_EXPR,
8061 vectype, new_temp));
8062 new_stmt_info
8063 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8067 if (slp)
8069 if (slp_perm)
8070 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
8071 else
8072 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8074 else
8076 if (j == 0)
8077 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8078 else
8079 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8080 prev_stmt_info = new_stmt_info;
8083 if (slp_perm)
8085 unsigned n_perms;
8086 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8087 slp_node_instance, false, &n_perms);
8089 return true;
8092 if (memory_access_type == VMAT_GATHER_SCATTER
8093 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8094 grouped_load = false;
8096 if (grouped_load)
8098 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8099 group_size = DR_GROUP_SIZE (first_stmt_info);
8100 /* For SLP vectorization we directly vectorize a subchain
8101 without permutation. */
8102 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8103 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8104 /* For BB vectorization always use the first stmt to base
8105 the data ref pointer on. */
8106 if (bb_vinfo)
8107 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8109 /* Check if the chain of loads is already vectorized. */
8110 if (STMT_VINFO_VEC_STMT (first_stmt_info)
8111 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8112 ??? But we can only do so if there is exactly one
8113 as we have no way to get at the rest. Leave the CSE
8114 opportunity alone.
8115 ??? With the group load eventually participating
8116 in multiple different permutations (having multiple
8117 slp nodes which refer to the same group) the CSE
8118 is even wrong code. See PR56270. */
8119 && !slp)
8121 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8122 return true;
8124 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8125 group_gap_adj = 0;
8127 /* VEC_NUM is the number of vect stmts to be created for this group. */
8128 if (slp)
8130 grouped_load = false;
8131 /* If an SLP permutation is from N elements to N elements,
8132 and if one vector holds a whole number of N, we can load
8133 the inputs to the permutation in the same way as an
8134 unpermuted sequence. In other cases we need to load the
8135 whole group, not only the number of vector stmts the
8136 permutation result fits in. */
8137 if (slp_perm
8138 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
8139 || !multiple_p (nunits, group_size)))
8141 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8142 variable VF; see vect_transform_slp_perm_load. */
8143 unsigned int const_vf = vf.to_constant ();
8144 unsigned int const_nunits = nunits.to_constant ();
8145 vec_num = CEIL (group_size * const_vf, const_nunits);
8146 group_gap_adj = vf * group_size - nunits * vec_num;
8148 else
8150 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8151 group_gap_adj
8152 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
8155 else
8156 vec_num = group_size;
8158 ref_type = get_group_alias_ptr_type (first_stmt_info);
8160 else
8162 first_stmt_info = stmt_info;
8163 first_dr_info = dr_info;
8164 group_size = vec_num = 1;
8165 group_gap_adj = 0;
8166 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8169 alignment_support_scheme
8170 = vect_supportable_dr_alignment (first_dr_info, false);
8171 gcc_assert (alignment_support_scheme);
8172 vec_loop_masks *loop_masks
8173 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8174 ? &LOOP_VINFO_MASKS (loop_vinfo)
8175 : NULL);
8176 /* Targets with store-lane instructions must not require explicit
8177 realignment. vect_supportable_dr_alignment always returns either
8178 dr_aligned or dr_unaligned_supported for masked operations. */
8179 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8180 && !mask
8181 && !loop_masks)
8182 || alignment_support_scheme == dr_aligned
8183 || alignment_support_scheme == dr_unaligned_supported);
8185 /* In case the vectorization factor (VF) is bigger than the number
8186 of elements that we can fit in a vectype (nunits), we have to generate
8187 more than one vector stmt - i.e - we need to "unroll" the
8188 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8189 from one copy of the vector stmt to the next, in the field
8190 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8191 stages to find the correct vector defs to be used when vectorizing
8192 stmts that use the defs of the current stmt. The example below
8193 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8194 need to create 4 vectorized stmts):
8196 before vectorization:
8197 RELATED_STMT VEC_STMT
8198 S1: x = memref - -
8199 S2: z = x + 1 - -
8201 step 1: vectorize stmt S1:
8202 We first create the vector stmt VS1_0, and, as usual, record a
8203 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8204 Next, we create the vector stmt VS1_1, and record a pointer to
8205 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8206 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8207 stmts and pointers:
8208 RELATED_STMT VEC_STMT
8209 VS1_0: vx0 = memref0 VS1_1 -
8210 VS1_1: vx1 = memref1 VS1_2 -
8211 VS1_2: vx2 = memref2 VS1_3 -
8212 VS1_3: vx3 = memref3 - -
8213 S1: x = load - VS1_0
8214 S2: z = x + 1 - -
8216 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8217 information we recorded in RELATED_STMT field is used to vectorize
8218 stmt S2. */
8220 /* In case of interleaving (non-unit grouped access):
8222 S1: x2 = &base + 2
8223 S2: x0 = &base
8224 S3: x1 = &base + 1
8225 S4: x3 = &base + 3
8227 Vectorized loads are created in the order of memory accesses
8228 starting from the access of the first stmt of the chain:
8230 VS1: vx0 = &base
8231 VS2: vx1 = &base + vec_size*1
8232 VS3: vx3 = &base + vec_size*2
8233 VS4: vx4 = &base + vec_size*3
8235 Then permutation statements are generated:
8237 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8238 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8241 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8242 (the order of the data-refs in the output of vect_permute_load_chain
8243 corresponds to the order of scalar stmts in the interleaving chain - see
8244 the documentation of vect_permute_load_chain()).
8245 The generation of permutation stmts and recording them in
8246 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8248 In case of both multiple types and interleaving, the vector loads and
8249 permutation stmts above are created for every copy. The result vector
8250 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8251 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8253 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8254 on a target that supports unaligned accesses (dr_unaligned_supported)
8255 we generate the following code:
8256 p = initial_addr;
8257 indx = 0;
8258 loop {
8259 p = p + indx * vectype_size;
8260 vec_dest = *(p);
8261 indx = indx + 1;
8264 Otherwise, the data reference is potentially unaligned on a target that
8265 does not support unaligned accesses (dr_explicit_realign_optimized) -
8266 then generate the following code, in which the data in each iteration is
8267 obtained by two vector loads, one from the previous iteration, and one
8268 from the current iteration:
8269 p1 = initial_addr;
8270 msq_init = *(floor(p1))
8271 p2 = initial_addr + VS - 1;
8272 realignment_token = call target_builtin;
8273 indx = 0;
8274 loop {
8275 p2 = p2 + indx * vectype_size
8276 lsq = *(floor(p2))
8277 vec_dest = realign_load (msq, lsq, realignment_token)
8278 indx = indx + 1;
8279 msq = lsq;
8280 } */
8282 /* If the misalignment remains the same throughout the execution of the
8283 loop, we can create the init_addr and permutation mask at the loop
8284 preheader. Otherwise, it needs to be created inside the loop.
8285 This can only occur when vectorizing memory accesses in the inner-loop
8286 nested within an outer-loop that is being vectorized. */
8288 if (nested_in_vect_loop
8289 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8290 GET_MODE_SIZE (TYPE_MODE (vectype))))
8292 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8293 compute_in_loop = true;
8296 if ((alignment_support_scheme == dr_explicit_realign_optimized
8297 || alignment_support_scheme == dr_explicit_realign)
8298 && !compute_in_loop)
8300 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8301 alignment_support_scheme, NULL_TREE,
8302 &at_loop);
8303 if (alignment_support_scheme == dr_explicit_realign_optimized)
8305 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8306 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8307 size_one_node);
8310 else
8311 at_loop = loop;
8313 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8314 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8316 tree bump;
8317 tree vec_offset = NULL_TREE;
8318 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8320 aggr_type = NULL_TREE;
8321 bump = NULL_TREE;
8323 else if (memory_access_type == VMAT_GATHER_SCATTER)
8325 aggr_type = elem_type;
8326 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8327 &bump, &vec_offset);
8329 else
8331 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8332 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8333 else
8334 aggr_type = vectype;
8335 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8336 memory_access_type);
8339 tree vec_mask = NULL_TREE;
8340 prev_stmt_info = NULL;
8341 poly_uint64 group_elt = 0;
8342 for (j = 0; j < ncopies; j++)
8344 stmt_vec_info new_stmt_info = NULL;
8345 /* 1. Create the vector or array pointer update chain. */
8346 if (j == 0)
8348 bool simd_lane_access_p
8349 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8350 if (simd_lane_access_p
8351 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8352 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8353 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8354 && integer_zerop (DR_INIT (first_dr_info->dr))
8355 && alias_sets_conflict_p (get_alias_set (aggr_type),
8356 get_alias_set (TREE_TYPE (ref_type)))
8357 && (alignment_support_scheme == dr_aligned
8358 || alignment_support_scheme == dr_unaligned_supported))
8360 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8361 dataref_offset = build_int_cst (ref_type, 0);
8363 else if (first_stmt_info_for_drptr
8364 && first_stmt_info != first_stmt_info_for_drptr)
8366 dataref_ptr
8367 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8368 aggr_type, at_loop, offset, &dummy,
8369 gsi, &ptr_incr, simd_lane_access_p,
8370 byte_offset, bump);
8371 /* Adjust the pointer by the difference to first_stmt. */
8372 data_reference_p ptrdr
8373 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8374 tree diff
8375 = fold_convert (sizetype,
8376 size_binop (MINUS_EXPR,
8377 DR_INIT (first_dr_info->dr),
8378 DR_INIT (ptrdr)));
8379 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8380 stmt_info, diff);
8382 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8383 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8384 &dataref_ptr, &vec_offset);
8385 else
8386 dataref_ptr
8387 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8388 offset, &dummy, gsi, &ptr_incr,
8389 simd_lane_access_p,
8390 byte_offset, bump);
8391 if (mask)
8392 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8393 mask_vectype);
8395 else
8397 if (dataref_offset)
8398 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8399 bump);
8400 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8401 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8402 else
8403 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8404 stmt_info, bump);
8405 if (mask)
8406 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8409 if (grouped_load || slp_perm)
8410 dr_chain.create (vec_num);
8412 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8414 tree vec_array;
8416 vec_array = create_vector_array (vectype, vec_num);
8418 tree final_mask = NULL_TREE;
8419 if (loop_masks)
8420 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8421 vectype, j);
8422 if (vec_mask)
8423 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8424 vec_mask, gsi);
8426 gcall *call;
8427 if (final_mask)
8429 /* Emit:
8430 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8431 VEC_MASK). */
8432 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8433 tree alias_ptr = build_int_cst (ref_type, align);
8434 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8435 dataref_ptr, alias_ptr,
8436 final_mask);
8438 else
8440 /* Emit:
8441 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8442 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8443 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8445 gimple_call_set_lhs (call, vec_array);
8446 gimple_call_set_nothrow (call, true);
8447 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8449 /* Extract each vector into an SSA_NAME. */
8450 for (i = 0; i < vec_num; i++)
8452 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8453 vec_array, i);
8454 dr_chain.quick_push (new_temp);
8457 /* Record the mapping between SSA_NAMEs and statements. */
8458 vect_record_grouped_load_vectors (stmt_info, dr_chain);
8460 /* Record that VEC_ARRAY is now dead. */
8461 vect_clobber_variable (stmt_info, gsi, vec_array);
8463 else
8465 for (i = 0; i < vec_num; i++)
8467 tree final_mask = NULL_TREE;
8468 if (loop_masks
8469 && memory_access_type != VMAT_INVARIANT)
8470 final_mask = vect_get_loop_mask (gsi, loop_masks,
8471 vec_num * ncopies,
8472 vectype, vec_num * j + i);
8473 if (vec_mask)
8474 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8475 vec_mask, gsi);
8477 if (i > 0)
8478 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8479 stmt_info, bump);
8481 /* 2. Create the vector-load in the loop. */
8482 gimple *new_stmt = NULL;
8483 switch (alignment_support_scheme)
8485 case dr_aligned:
8486 case dr_unaligned_supported:
8488 unsigned int misalign;
8489 unsigned HOST_WIDE_INT align;
8491 if (memory_access_type == VMAT_GATHER_SCATTER)
8493 tree scale = size_int (gs_info.scale);
8494 gcall *call;
8495 if (loop_masks)
8496 call = gimple_build_call_internal
8497 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8498 vec_offset, scale, final_mask);
8499 else
8500 call = gimple_build_call_internal
8501 (IFN_GATHER_LOAD, 3, dataref_ptr,
8502 vec_offset, scale);
8503 gimple_call_set_nothrow (call, true);
8504 new_stmt = call;
8505 data_ref = NULL_TREE;
8506 break;
8509 align =
8510 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8511 if (alignment_support_scheme == dr_aligned)
8513 gcc_assert (aligned_access_p (first_dr_info));
8514 misalign = 0;
8516 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8518 align = dr_alignment
8519 (vect_dr_behavior (first_dr_info));
8520 misalign = 0;
8522 else
8523 misalign = DR_MISALIGNMENT (first_dr_info);
8524 if (dataref_offset == NULL_TREE
8525 && TREE_CODE (dataref_ptr) == SSA_NAME)
8526 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8527 align, misalign);
8529 if (final_mask)
8531 align = least_bit_hwi (misalign | align);
8532 tree ptr = build_int_cst (ref_type, align);
8533 gcall *call
8534 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8535 dataref_ptr, ptr,
8536 final_mask);
8537 gimple_call_set_nothrow (call, true);
8538 new_stmt = call;
8539 data_ref = NULL_TREE;
8541 else
8543 tree ltype = vectype;
8544 /* If there's no peeling for gaps but we have a gap
8545 with slp loads then load the lower half of the
8546 vector only. See get_group_load_store_type for
8547 when we apply this optimization. */
8548 if (slp
8549 && loop_vinfo
8550 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
8551 && DR_GROUP_GAP (first_stmt_info) != 0
8552 && known_eq (nunits,
8553 (group_size
8554 - DR_GROUP_GAP (first_stmt_info)) * 2)
8555 && known_eq (nunits, group_size))
8556 ltype = build_vector_type (TREE_TYPE (vectype),
8557 (group_size
8558 - DR_GROUP_GAP
8559 (first_stmt_info)));
8560 data_ref
8561 = fold_build2 (MEM_REF, ltype, dataref_ptr,
8562 dataref_offset
8563 ? dataref_offset
8564 : build_int_cst (ref_type, 0));
8565 if (alignment_support_scheme == dr_aligned)
8567 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8568 TREE_TYPE (data_ref)
8569 = build_aligned_type (TREE_TYPE (data_ref),
8570 align * BITS_PER_UNIT);
8571 else
8572 TREE_TYPE (data_ref)
8573 = build_aligned_type (TREE_TYPE (data_ref),
8574 TYPE_ALIGN (elem_type));
8575 if (ltype != vectype)
8577 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8578 tree tem = make_ssa_name (ltype);
8579 new_stmt = gimple_build_assign (tem, data_ref);
8580 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8581 data_ref = NULL;
8582 vec<constructor_elt, va_gc> *v;
8583 vec_alloc (v, 2);
8584 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
8585 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8586 build_zero_cst (ltype));
8587 new_stmt
8588 = gimple_build_assign (vec_dest,
8589 build_constructor
8590 (vectype, v));
8593 break;
8595 case dr_explicit_realign:
8597 tree ptr, bump;
8599 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8601 if (compute_in_loop)
8602 msq = vect_setup_realignment (first_stmt_info, gsi,
8603 &realignment_token,
8604 dr_explicit_realign,
8605 dataref_ptr, NULL);
8607 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8608 ptr = copy_ssa_name (dataref_ptr);
8609 else
8610 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8611 // For explicit realign the target alignment should be
8612 // known at compile time.
8613 unsigned HOST_WIDE_INT align =
8614 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8615 new_stmt = gimple_build_assign
8616 (ptr, BIT_AND_EXPR, dataref_ptr,
8617 build_int_cst
8618 (TREE_TYPE (dataref_ptr),
8619 -(HOST_WIDE_INT) align));
8620 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8621 data_ref
8622 = build2 (MEM_REF, vectype, ptr,
8623 build_int_cst (ref_type, 0));
8624 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8625 vec_dest = vect_create_destination_var (scalar_dest,
8626 vectype);
8627 new_stmt = gimple_build_assign (vec_dest, data_ref);
8628 new_temp = make_ssa_name (vec_dest, new_stmt);
8629 gimple_assign_set_lhs (new_stmt, new_temp);
8630 gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8631 gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8632 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8633 msq = new_temp;
8635 bump = size_binop (MULT_EXPR, vs,
8636 TYPE_SIZE_UNIT (elem_type));
8637 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8638 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8639 stmt_info, bump);
8640 new_stmt = gimple_build_assign
8641 (NULL_TREE, BIT_AND_EXPR, ptr,
8642 build_int_cst
8643 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8644 ptr = copy_ssa_name (ptr, new_stmt);
8645 gimple_assign_set_lhs (new_stmt, ptr);
8646 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8647 data_ref
8648 = build2 (MEM_REF, vectype, ptr,
8649 build_int_cst (ref_type, 0));
8650 break;
8652 case dr_explicit_realign_optimized:
8654 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8655 new_temp = copy_ssa_name (dataref_ptr);
8656 else
8657 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8658 // We should only be doing this if we know the target
8659 // alignment at compile time.
8660 unsigned HOST_WIDE_INT align =
8661 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8662 new_stmt = gimple_build_assign
8663 (new_temp, BIT_AND_EXPR, dataref_ptr,
8664 build_int_cst (TREE_TYPE (dataref_ptr),
8665 -(HOST_WIDE_INT) align));
8666 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8667 data_ref
8668 = build2 (MEM_REF, vectype, new_temp,
8669 build_int_cst (ref_type, 0));
8670 break;
8672 default:
8673 gcc_unreachable ();
8675 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8676 /* DATA_REF is null if we've already built the statement. */
8677 if (data_ref)
8679 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8680 new_stmt = gimple_build_assign (vec_dest, data_ref);
8682 new_temp = make_ssa_name (vec_dest, new_stmt);
8683 gimple_set_lhs (new_stmt, new_temp);
8684 new_stmt_info
8685 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8687 /* 3. Handle explicit realignment if necessary/supported.
8688 Create in loop:
8689 vec_dest = realign_load (msq, lsq, realignment_token) */
8690 if (alignment_support_scheme == dr_explicit_realign_optimized
8691 || alignment_support_scheme == dr_explicit_realign)
8693 lsq = gimple_assign_lhs (new_stmt);
8694 if (!realignment_token)
8695 realignment_token = dataref_ptr;
8696 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8697 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8698 msq, lsq, realignment_token);
8699 new_temp = make_ssa_name (vec_dest, new_stmt);
8700 gimple_assign_set_lhs (new_stmt, new_temp);
8701 new_stmt_info
8702 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8704 if (alignment_support_scheme == dr_explicit_realign_optimized)
8706 gcc_assert (phi);
8707 if (i == vec_num - 1 && j == ncopies - 1)
8708 add_phi_arg (phi, lsq,
8709 loop_latch_edge (containing_loop),
8710 UNKNOWN_LOCATION);
8711 msq = lsq;
8715 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8717 tree perm_mask = perm_mask_for_reverse (vectype);
8718 new_temp = permute_vec_elements (new_temp, new_temp,
8719 perm_mask, stmt_info, gsi);
8720 new_stmt_info = vinfo->lookup_def (new_temp);
8723 /* Collect vector loads and later create their permutation in
8724 vect_transform_grouped_load (). */
8725 if (grouped_load || slp_perm)
8726 dr_chain.quick_push (new_temp);
8728 /* Store vector loads in the corresponding SLP_NODE. */
8729 if (slp && !slp_perm)
8730 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8732 /* With SLP permutation we load the gaps as well, without
8733 we need to skip the gaps after we manage to fully load
8734 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8735 group_elt += nunits;
8736 if (maybe_ne (group_gap_adj, 0U)
8737 && !slp_perm
8738 && known_eq (group_elt, group_size - group_gap_adj))
8740 poly_wide_int bump_val
8741 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8742 * group_gap_adj);
8743 tree bump = wide_int_to_tree (sizetype, bump_val);
8744 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8745 stmt_info, bump);
8746 group_elt = 0;
8749 /* Bump the vector pointer to account for a gap or for excess
8750 elements loaded for a permuted SLP load. */
8751 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8753 poly_wide_int bump_val
8754 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8755 * group_gap_adj);
8756 tree bump = wide_int_to_tree (sizetype, bump_val);
8757 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8758 stmt_info, bump);
8762 if (slp && !slp_perm)
8763 continue;
8765 if (slp_perm)
8767 unsigned n_perms;
8768 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8769 slp_node_instance, false,
8770 &n_perms))
8772 dr_chain.release ();
8773 return false;
8776 else
8778 if (grouped_load)
8780 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8781 vect_transform_grouped_load (stmt_info, dr_chain,
8782 group_size, gsi);
8783 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8785 else
8787 if (j == 0)
8788 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8789 else
8790 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8791 prev_stmt_info = new_stmt_info;
8794 dr_chain.release ();
8797 return true;
8800 /* Function vect_is_simple_cond.
8802 Input:
8803 LOOP - the loop that is being vectorized.
8804 COND - Condition that is checked for simple use.
8806 Output:
8807 *COMP_VECTYPE - the vector type for the comparison.
8808 *DTS - The def types for the arguments of the comparison
8810 Returns whether a COND can be vectorized. Checks whether
8811 condition operands are supportable using vec_is_simple_use. */
8813 static bool
8814 vect_is_simple_cond (tree cond, vec_info *vinfo,
8815 tree *comp_vectype, enum vect_def_type *dts,
8816 tree vectype)
8818 tree lhs, rhs;
8819 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8821 /* Mask case. */
8822 if (TREE_CODE (cond) == SSA_NAME
8823 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8825 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8826 || !*comp_vectype
8827 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8828 return false;
8829 return true;
8832 if (!COMPARISON_CLASS_P (cond))
8833 return false;
8835 lhs = TREE_OPERAND (cond, 0);
8836 rhs = TREE_OPERAND (cond, 1);
8838 if (TREE_CODE (lhs) == SSA_NAME)
8840 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8841 return false;
8843 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8844 || TREE_CODE (lhs) == FIXED_CST)
8845 dts[0] = vect_constant_def;
8846 else
8847 return false;
8849 if (TREE_CODE (rhs) == SSA_NAME)
8851 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8852 return false;
8854 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8855 || TREE_CODE (rhs) == FIXED_CST)
8856 dts[1] = vect_constant_def;
8857 else
8858 return false;
8860 if (vectype1 && vectype2
8861 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8862 TYPE_VECTOR_SUBPARTS (vectype2)))
8863 return false;
8865 *comp_vectype = vectype1 ? vectype1 : vectype2;
8866 /* Invariant comparison. */
8867 if (! *comp_vectype)
8869 tree scalar_type = TREE_TYPE (lhs);
8870 /* If we can widen the comparison to match vectype do so. */
8871 if (INTEGRAL_TYPE_P (scalar_type)
8872 && vectype
8873 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8874 TYPE_SIZE (TREE_TYPE (vectype))))
8875 scalar_type = build_nonstandard_integer_type
8876 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8877 TYPE_UNSIGNED (scalar_type));
8878 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8881 return true;
8884 /* vectorizable_condition.
8886 Check if STMT_INFO is conditional modify expression that can be vectorized.
8887 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8888 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8889 at GSI.
8891 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
8893 Return true if STMT_INFO is vectorizable in this way. */
8895 bool
8896 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8897 stmt_vec_info *vec_stmt, bool for_reduction,
8898 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8900 vec_info *vinfo = stmt_info->vinfo;
8901 tree scalar_dest = NULL_TREE;
8902 tree vec_dest = NULL_TREE;
8903 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8904 tree then_clause, else_clause;
8905 tree comp_vectype = NULL_TREE;
8906 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8907 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8908 tree vec_compare;
8909 tree new_temp;
8910 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8911 enum vect_def_type dts[4]
8912 = {vect_unknown_def_type, vect_unknown_def_type,
8913 vect_unknown_def_type, vect_unknown_def_type};
8914 int ndts = 4;
8915 int ncopies;
8916 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8917 stmt_vec_info prev_stmt_info = NULL;
8918 int i, j;
8919 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8920 vec<tree> vec_oprnds0 = vNULL;
8921 vec<tree> vec_oprnds1 = vNULL;
8922 vec<tree> vec_oprnds2 = vNULL;
8923 vec<tree> vec_oprnds3 = vNULL;
8924 tree vec_cmp_type;
8925 bool masked = false;
8927 if (for_reduction && STMT_SLP_TYPE (stmt_info))
8928 return false;
8930 vect_reduction_type reduction_type
8931 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8932 if (reduction_type == TREE_CODE_REDUCTION)
8934 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8935 return false;
8937 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8938 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8939 && for_reduction))
8940 return false;
8942 /* FORNOW: not yet supported. */
8943 if (STMT_VINFO_LIVE_P (stmt_info))
8945 if (dump_enabled_p ())
8946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8947 "value used after loop.\n");
8948 return false;
8952 /* Is vectorizable conditional operation? */
8953 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8954 if (!stmt)
8955 return false;
8957 code = gimple_assign_rhs_code (stmt);
8959 if (code != COND_EXPR)
8960 return false;
8962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8963 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8965 if (slp_node)
8966 ncopies = 1;
8967 else
8968 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8970 gcc_assert (ncopies >= 1);
8971 if (for_reduction && ncopies > 1)
8972 return false; /* FORNOW */
8974 cond_expr = gimple_assign_rhs1 (stmt);
8975 then_clause = gimple_assign_rhs2 (stmt);
8976 else_clause = gimple_assign_rhs3 (stmt);
8978 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8979 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8980 || !comp_vectype)
8981 return false;
8983 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8984 return false;
8985 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8986 return false;
8988 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8989 return false;
8991 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8992 return false;
8994 masked = !COMPARISON_CLASS_P (cond_expr);
8995 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8997 if (vec_cmp_type == NULL_TREE)
8998 return false;
9000 cond_code = TREE_CODE (cond_expr);
9001 if (!masked)
9003 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9004 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9007 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9009 /* Boolean values may have another representation in vectors
9010 and therefore we prefer bit operations over comparison for
9011 them (which also works for scalar masks). We store opcodes
9012 to use in bitop1 and bitop2. Statement is vectorized as
9013 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9014 depending on bitop1 and bitop2 arity. */
9015 switch (cond_code)
9017 case GT_EXPR:
9018 bitop1 = BIT_NOT_EXPR;
9019 bitop2 = BIT_AND_EXPR;
9020 break;
9021 case GE_EXPR:
9022 bitop1 = BIT_NOT_EXPR;
9023 bitop2 = BIT_IOR_EXPR;
9024 break;
9025 case LT_EXPR:
9026 bitop1 = BIT_NOT_EXPR;
9027 bitop2 = BIT_AND_EXPR;
9028 std::swap (cond_expr0, cond_expr1);
9029 break;
9030 case LE_EXPR:
9031 bitop1 = BIT_NOT_EXPR;
9032 bitop2 = BIT_IOR_EXPR;
9033 std::swap (cond_expr0, cond_expr1);
9034 break;
9035 case NE_EXPR:
9036 bitop1 = BIT_XOR_EXPR;
9037 break;
9038 case EQ_EXPR:
9039 bitop1 = BIT_XOR_EXPR;
9040 bitop2 = BIT_NOT_EXPR;
9041 break;
9042 default:
9043 return false;
9045 cond_code = SSA_NAME;
9048 if (!vec_stmt)
9050 if (bitop1 != NOP_EXPR)
9052 machine_mode mode = TYPE_MODE (comp_vectype);
9053 optab optab;
9055 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9056 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9057 return false;
9059 if (bitop2 != NOP_EXPR)
9061 optab = optab_for_tree_code (bitop2, comp_vectype,
9062 optab_default);
9063 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9064 return false;
9067 if (expand_vec_cond_expr_p (vectype, comp_vectype,
9068 cond_code))
9070 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
9071 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
9072 cost_vec);
9073 return true;
9075 return false;
9078 /* Transform. */
9080 if (!slp_node)
9082 vec_oprnds0.create (1);
9083 vec_oprnds1.create (1);
9084 vec_oprnds2.create (1);
9085 vec_oprnds3.create (1);
9088 /* Handle def. */
9089 scalar_dest = gimple_assign_lhs (stmt);
9090 if (reduction_type != EXTRACT_LAST_REDUCTION)
9091 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9093 /* Handle cond expr. */
9094 for (j = 0; j < ncopies; j++)
9096 stmt_vec_info new_stmt_info = NULL;
9097 if (j == 0)
9099 if (slp_node)
9101 auto_vec<tree, 4> ops;
9102 auto_vec<vec<tree>, 4> vec_defs;
9104 if (masked)
9105 ops.safe_push (cond_expr);
9106 else
9108 ops.safe_push (cond_expr0);
9109 ops.safe_push (cond_expr1);
9111 ops.safe_push (then_clause);
9112 ops.safe_push (else_clause);
9113 vect_get_slp_defs (ops, slp_node, &vec_defs);
9114 vec_oprnds3 = vec_defs.pop ();
9115 vec_oprnds2 = vec_defs.pop ();
9116 if (!masked)
9117 vec_oprnds1 = vec_defs.pop ();
9118 vec_oprnds0 = vec_defs.pop ();
9120 else
9122 if (masked)
9124 vec_cond_lhs
9125 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
9126 comp_vectype);
9128 else
9130 vec_cond_lhs
9131 = vect_get_vec_def_for_operand (cond_expr0,
9132 stmt_info, comp_vectype);
9133 vec_cond_rhs
9134 = vect_get_vec_def_for_operand (cond_expr1,
9135 stmt_info, comp_vectype);
9137 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
9138 stmt_info);
9139 if (reduction_type != EXTRACT_LAST_REDUCTION)
9140 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
9141 stmt_info);
9144 else
9146 vec_cond_lhs
9147 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
9148 if (!masked)
9149 vec_cond_rhs
9150 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
9152 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9153 vec_oprnds2.pop ());
9154 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9155 vec_oprnds3.pop ());
9158 if (!slp_node)
9160 vec_oprnds0.quick_push (vec_cond_lhs);
9161 if (!masked)
9162 vec_oprnds1.quick_push (vec_cond_rhs);
9163 vec_oprnds2.quick_push (vec_then_clause);
9164 vec_oprnds3.quick_push (vec_else_clause);
9167 /* Arguments are ready. Create the new vector stmt. */
9168 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
9170 vec_then_clause = vec_oprnds2[i];
9171 vec_else_clause = vec_oprnds3[i];
9173 if (masked)
9174 vec_compare = vec_cond_lhs;
9175 else
9177 vec_cond_rhs = vec_oprnds1[i];
9178 if (bitop1 == NOP_EXPR)
9179 vec_compare = build2 (cond_code, vec_cmp_type,
9180 vec_cond_lhs, vec_cond_rhs);
9181 else
9183 new_temp = make_ssa_name (vec_cmp_type);
9184 gassign *new_stmt;
9185 if (bitop1 == BIT_NOT_EXPR)
9186 new_stmt = gimple_build_assign (new_temp, bitop1,
9187 vec_cond_rhs);
9188 else
9189 new_stmt
9190 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9191 vec_cond_rhs);
9192 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9193 if (bitop2 == NOP_EXPR)
9194 vec_compare = new_temp;
9195 else if (bitop2 == BIT_NOT_EXPR)
9197 /* Instead of doing ~x ? y : z do x ? z : y. */
9198 vec_compare = new_temp;
9199 std::swap (vec_then_clause, vec_else_clause);
9201 else
9203 vec_compare = make_ssa_name (vec_cmp_type);
9204 new_stmt
9205 = gimple_build_assign (vec_compare, bitop2,
9206 vec_cond_lhs, new_temp);
9207 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9211 if (reduction_type == EXTRACT_LAST_REDUCTION)
9213 if (!is_gimple_val (vec_compare))
9215 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9216 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9217 vec_compare);
9218 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9219 vec_compare = vec_compare_name;
9221 gcall *new_stmt = gimple_build_call_internal
9222 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9223 vec_then_clause);
9224 gimple_call_set_lhs (new_stmt, scalar_dest);
9225 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9226 if (stmt_info->stmt == gsi_stmt (*gsi))
9227 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9228 else
9230 /* In this case we're moving the definition to later in the
9231 block. That doesn't matter because the only uses of the
9232 lhs are in phi statements. */
9233 gimple_stmt_iterator old_gsi
9234 = gsi_for_stmt (stmt_info->stmt);
9235 gsi_remove (&old_gsi, true);
9236 new_stmt_info
9237 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9240 else
9242 new_temp = make_ssa_name (vec_dest);
9243 gassign *new_stmt
9244 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9245 vec_then_clause, vec_else_clause);
9246 new_stmt_info
9247 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9249 if (slp_node)
9250 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9253 if (slp_node)
9254 continue;
9256 if (j == 0)
9257 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9258 else
9259 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9261 prev_stmt_info = new_stmt_info;
9264 vec_oprnds0.release ();
9265 vec_oprnds1.release ();
9266 vec_oprnds2.release ();
9267 vec_oprnds3.release ();
9269 return true;
9272 /* vectorizable_comparison.
9274 Check if STMT_INFO is comparison expression that can be vectorized.
9275 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9276 comparison, put it in VEC_STMT, and insert it at GSI.
9278 Return true if STMT_INFO is vectorizable in this way. */
9280 static bool
9281 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9282 stmt_vec_info *vec_stmt,
9283 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9285 vec_info *vinfo = stmt_info->vinfo;
9286 tree lhs, rhs1, rhs2;
9287 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9288 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9289 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9290 tree new_temp;
9291 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9292 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9293 int ndts = 2;
9294 poly_uint64 nunits;
9295 int ncopies;
9296 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9297 stmt_vec_info prev_stmt_info = NULL;
9298 int i, j;
9299 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9300 vec<tree> vec_oprnds0 = vNULL;
9301 vec<tree> vec_oprnds1 = vNULL;
9302 tree mask_type;
9303 tree mask;
9305 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9306 return false;
9308 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9309 return false;
9311 mask_type = vectype;
9312 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9314 if (slp_node)
9315 ncopies = 1;
9316 else
9317 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9319 gcc_assert (ncopies >= 1);
9320 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9321 return false;
9323 if (STMT_VINFO_LIVE_P (stmt_info))
9325 if (dump_enabled_p ())
9326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9327 "value used after loop.\n");
9328 return false;
9331 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9332 if (!stmt)
9333 return false;
9335 code = gimple_assign_rhs_code (stmt);
9337 if (TREE_CODE_CLASS (code) != tcc_comparison)
9338 return false;
9340 rhs1 = gimple_assign_rhs1 (stmt);
9341 rhs2 = gimple_assign_rhs2 (stmt);
9343 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9344 return false;
9346 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9347 return false;
9349 if (vectype1 && vectype2
9350 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9351 TYPE_VECTOR_SUBPARTS (vectype2)))
9352 return false;
9354 vectype = vectype1 ? vectype1 : vectype2;
9356 /* Invariant comparison. */
9357 if (!vectype)
9359 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9360 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9361 return false;
9363 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9364 return false;
9366 /* Can't compare mask and non-mask types. */
9367 if (vectype1 && vectype2
9368 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9369 return false;
9371 /* Boolean values may have another representation in vectors
9372 and therefore we prefer bit operations over comparison for
9373 them (which also works for scalar masks). We store opcodes
9374 to use in bitop1 and bitop2. Statement is vectorized as
9375 BITOP2 (rhs1 BITOP1 rhs2) or
9376 rhs1 BITOP2 (BITOP1 rhs2)
9377 depending on bitop1 and bitop2 arity. */
9378 bool swap_p = false;
9379 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9381 if (code == GT_EXPR)
9383 bitop1 = BIT_NOT_EXPR;
9384 bitop2 = BIT_AND_EXPR;
9386 else if (code == GE_EXPR)
9388 bitop1 = BIT_NOT_EXPR;
9389 bitop2 = BIT_IOR_EXPR;
9391 else if (code == LT_EXPR)
9393 bitop1 = BIT_NOT_EXPR;
9394 bitop2 = BIT_AND_EXPR;
9395 swap_p = true;
9397 else if (code == LE_EXPR)
9399 bitop1 = BIT_NOT_EXPR;
9400 bitop2 = BIT_IOR_EXPR;
9401 swap_p = true;
9403 else
9405 bitop1 = BIT_XOR_EXPR;
9406 if (code == EQ_EXPR)
9407 bitop2 = BIT_NOT_EXPR;
9411 if (!vec_stmt)
9413 if (bitop1 == NOP_EXPR)
9415 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9416 return false;
9418 else
9420 machine_mode mode = TYPE_MODE (vectype);
9421 optab optab;
9423 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9424 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9425 return false;
9427 if (bitop2 != NOP_EXPR)
9429 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9430 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9431 return false;
9435 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9436 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9437 dts, ndts, slp_node, cost_vec);
9438 return true;
9441 /* Transform. */
9442 if (!slp_node)
9444 vec_oprnds0.create (1);
9445 vec_oprnds1.create (1);
9448 /* Handle def. */
9449 lhs = gimple_assign_lhs (stmt);
9450 mask = vect_create_destination_var (lhs, mask_type);
9452 /* Handle cmp expr. */
9453 for (j = 0; j < ncopies; j++)
9455 stmt_vec_info new_stmt_info = NULL;
9456 if (j == 0)
9458 if (slp_node)
9460 auto_vec<tree, 2> ops;
9461 auto_vec<vec<tree>, 2> vec_defs;
9463 ops.safe_push (rhs1);
9464 ops.safe_push (rhs2);
9465 vect_get_slp_defs (ops, slp_node, &vec_defs);
9466 vec_oprnds1 = vec_defs.pop ();
9467 vec_oprnds0 = vec_defs.pop ();
9468 if (swap_p)
9469 std::swap (vec_oprnds0, vec_oprnds1);
9471 else
9473 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9474 vectype);
9475 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9476 vectype);
9479 else
9481 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9482 vec_oprnds0.pop ());
9483 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9484 vec_oprnds1.pop ());
9487 if (!slp_node)
9489 if (swap_p)
9490 std::swap (vec_rhs1, vec_rhs2);
9491 vec_oprnds0.quick_push (vec_rhs1);
9492 vec_oprnds1.quick_push (vec_rhs2);
9495 /* Arguments are ready. Create the new vector stmt. */
9496 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9498 vec_rhs2 = vec_oprnds1[i];
9500 new_temp = make_ssa_name (mask);
9501 if (bitop1 == NOP_EXPR)
9503 gassign *new_stmt = gimple_build_assign (new_temp, code,
9504 vec_rhs1, vec_rhs2);
9505 new_stmt_info
9506 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9508 else
9510 gassign *new_stmt;
9511 if (bitop1 == BIT_NOT_EXPR)
9512 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9513 else
9514 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9515 vec_rhs2);
9516 new_stmt_info
9517 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9518 if (bitop2 != NOP_EXPR)
9520 tree res = make_ssa_name (mask);
9521 if (bitop2 == BIT_NOT_EXPR)
9522 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9523 else
9524 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9525 new_temp);
9526 new_stmt_info
9527 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9530 if (slp_node)
9531 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9534 if (slp_node)
9535 continue;
9537 if (j == 0)
9538 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9539 else
9540 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9542 prev_stmt_info = new_stmt_info;
9545 vec_oprnds0.release ();
9546 vec_oprnds1.release ();
9548 return true;
9551 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9552 can handle all live statements in the node. Otherwise return true
9553 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9554 GSI and VEC_STMT are as for vectorizable_live_operation. */
9556 static bool
9557 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9558 slp_tree slp_node, stmt_vec_info *vec_stmt,
9559 stmt_vector_for_cost *cost_vec)
9561 if (slp_node)
9563 stmt_vec_info slp_stmt_info;
9564 unsigned int i;
9565 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9567 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9568 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9569 vec_stmt, cost_vec))
9570 return false;
9573 else if (STMT_VINFO_LIVE_P (stmt_info)
9574 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9575 vec_stmt, cost_vec))
9576 return false;
9578 return true;
9581 /* Make sure the statement is vectorizable. */
9583 opt_result
9584 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9585 slp_tree node, slp_instance node_instance,
9586 stmt_vector_for_cost *cost_vec)
9588 vec_info *vinfo = stmt_info->vinfo;
9589 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9590 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9591 bool ok;
9592 gimple_seq pattern_def_seq;
9594 if (dump_enabled_p ())
9595 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9596 stmt_info->stmt);
9598 if (gimple_has_volatile_ops (stmt_info->stmt))
9599 return opt_result::failure_at (stmt_info->stmt,
9600 "not vectorized:"
9601 " stmt has volatile operands: %G\n",
9602 stmt_info->stmt);
9604 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9605 && node == NULL
9606 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9608 gimple_stmt_iterator si;
9610 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9612 stmt_vec_info pattern_def_stmt_info
9613 = vinfo->lookup_stmt (gsi_stmt (si));
9614 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9615 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9617 /* Analyze def stmt of STMT if it's a pattern stmt. */
9618 if (dump_enabled_p ())
9619 dump_printf_loc (MSG_NOTE, vect_location,
9620 "==> examining pattern def statement: %G",
9621 pattern_def_stmt_info->stmt);
9623 opt_result res
9624 = vect_analyze_stmt (pattern_def_stmt_info,
9625 need_to_vectorize, node, node_instance,
9626 cost_vec);
9627 if (!res)
9628 return res;
9633 /* Skip stmts that do not need to be vectorized. In loops this is expected
9634 to include:
9635 - the COND_EXPR which is the loop exit condition
9636 - any LABEL_EXPRs in the loop
9637 - computations that are used only for array indexing or loop control.
9638 In basic blocks we only analyze statements that are a part of some SLP
9639 instance, therefore, all the statements are relevant.
9641 Pattern statement needs to be analyzed instead of the original statement
9642 if the original statement is not relevant. Otherwise, we analyze both
9643 statements. In basic blocks we are called from some SLP instance
9644 traversal, don't analyze pattern stmts instead, the pattern stmts
9645 already will be part of SLP instance. */
9647 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9648 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9649 && !STMT_VINFO_LIVE_P (stmt_info))
9651 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9652 && pattern_stmt_info
9653 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9654 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9656 /* Analyze PATTERN_STMT instead of the original stmt. */
9657 stmt_info = pattern_stmt_info;
9658 if (dump_enabled_p ())
9659 dump_printf_loc (MSG_NOTE, vect_location,
9660 "==> examining pattern statement: %G",
9661 stmt_info->stmt);
9663 else
9665 if (dump_enabled_p ())
9666 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9668 return opt_result::success ();
9671 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9672 && node == NULL
9673 && pattern_stmt_info
9674 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9675 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9677 /* Analyze PATTERN_STMT too. */
9678 if (dump_enabled_p ())
9679 dump_printf_loc (MSG_NOTE, vect_location,
9680 "==> examining pattern statement: %G",
9681 pattern_stmt_info->stmt);
9683 opt_result res
9684 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9685 node_instance, cost_vec);
9686 if (!res)
9687 return res;
9690 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9692 case vect_internal_def:
9693 break;
9695 case vect_reduction_def:
9696 case vect_nested_cycle:
9697 gcc_assert (!bb_vinfo
9698 && (relevance == vect_used_in_outer
9699 || relevance == vect_used_in_outer_by_reduction
9700 || relevance == vect_used_by_reduction
9701 || relevance == vect_unused_in_scope
9702 || relevance == vect_used_only_live));
9703 break;
9705 case vect_induction_def:
9706 gcc_assert (!bb_vinfo);
9707 break;
9709 case vect_constant_def:
9710 case vect_external_def:
9711 case vect_unknown_def_type:
9712 default:
9713 gcc_unreachable ();
9716 if (STMT_VINFO_RELEVANT_P (stmt_info))
9718 tree type = gimple_expr_type (stmt_info->stmt);
9719 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9720 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9721 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9722 || (call && gimple_call_lhs (call) == NULL_TREE));
9723 *need_to_vectorize = true;
9726 if (PURE_SLP_STMT (stmt_info) && !node)
9728 if (dump_enabled_p ())
9729 dump_printf_loc (MSG_NOTE, vect_location,
9730 "handled only by SLP analysis\n");
9731 return opt_result::success ();
9734 ok = true;
9735 if (!bb_vinfo
9736 && (STMT_VINFO_RELEVANT_P (stmt_info)
9737 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9738 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9739 -mveclibabi= takes preference over library functions with
9740 the simd attribute. */
9741 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9742 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9743 cost_vec)
9744 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9745 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9746 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9747 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9748 cost_vec)
9749 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9750 || vectorizable_reduction (stmt_info, NULL, NULL, node,
9751 node_instance, cost_vec)
9752 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9753 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9754 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9755 cost_vec)
9756 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9757 cost_vec));
9758 else
9760 if (bb_vinfo)
9761 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9762 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9763 cost_vec)
9764 || vectorizable_conversion (stmt_info, NULL, NULL, node,
9765 cost_vec)
9766 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9767 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9768 || vectorizable_assignment (stmt_info, NULL, NULL, node,
9769 cost_vec)
9770 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9771 cost_vec)
9772 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9773 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9774 cost_vec)
9775 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9776 cost_vec));
9779 if (!ok)
9780 return opt_result::failure_at (stmt_info->stmt,
9781 "not vectorized:"
9782 " relevant stmt not supported: %G",
9783 stmt_info->stmt);
9785 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9786 need extra handling, except for vectorizable reductions. */
9787 if (!bb_vinfo
9788 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9789 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9790 return opt_result::failure_at (stmt_info->stmt,
9791 "not vectorized:"
9792 " live stmt not supported: %G",
9793 stmt_info->stmt);
9795 return opt_result::success ();
9799 /* Function vect_transform_stmt.
9801 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9803 bool
9804 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9805 slp_tree slp_node, slp_instance slp_node_instance)
9807 vec_info *vinfo = stmt_info->vinfo;
9808 bool is_store = false;
9809 stmt_vec_info vec_stmt = NULL;
9810 bool done;
9812 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9813 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9815 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9816 && nested_in_vect_loop_p
9817 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9818 stmt_info));
9820 gimple *stmt = stmt_info->stmt;
9821 switch (STMT_VINFO_TYPE (stmt_info))
9823 case type_demotion_vec_info_type:
9824 case type_promotion_vec_info_type:
9825 case type_conversion_vec_info_type:
9826 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9827 NULL);
9828 gcc_assert (done);
9829 break;
9831 case induc_vec_info_type:
9832 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9833 NULL);
9834 gcc_assert (done);
9835 break;
9837 case shift_vec_info_type:
9838 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9839 gcc_assert (done);
9840 break;
9842 case op_vec_info_type:
9843 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9844 NULL);
9845 gcc_assert (done);
9846 break;
9848 case assignment_vec_info_type:
9849 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9850 NULL);
9851 gcc_assert (done);
9852 break;
9854 case load_vec_info_type:
9855 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9856 slp_node_instance, NULL);
9857 gcc_assert (done);
9858 break;
9860 case store_vec_info_type:
9861 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9862 gcc_assert (done);
9863 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9865 /* In case of interleaving, the whole chain is vectorized when the
9866 last store in the chain is reached. Store stmts before the last
9867 one are skipped, and there vec_stmt_info shouldn't be freed
9868 meanwhile. */
9869 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9870 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9871 is_store = true;
9873 else
9874 is_store = true;
9875 break;
9877 case condition_vec_info_type:
9878 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
9879 slp_node, NULL);
9880 gcc_assert (done);
9881 break;
9883 case comparison_vec_info_type:
9884 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
9885 slp_node, NULL);
9886 gcc_assert (done);
9887 break;
9889 case call_vec_info_type:
9890 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9891 stmt = gsi_stmt (*gsi);
9892 break;
9894 case call_simd_clone_vec_info_type:
9895 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9896 slp_node, NULL);
9897 stmt = gsi_stmt (*gsi);
9898 break;
9900 case reduc_vec_info_type:
9901 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9902 slp_node_instance, NULL);
9903 gcc_assert (done);
9904 break;
9906 default:
9907 if (!STMT_VINFO_LIVE_P (stmt_info))
9909 if (dump_enabled_p ())
9910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9911 "stmt not supported.\n");
9912 gcc_unreachable ();
9916 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9917 This would break hybrid SLP vectorization. */
9918 if (slp_node)
9919 gcc_assert (!vec_stmt
9920 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9922 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9923 is being vectorized, but outside the immediately enclosing loop. */
9924 if (vec_stmt
9925 && nested_p
9926 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9927 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9928 || STMT_VINFO_RELEVANT (stmt_info) ==
9929 vect_used_in_outer_by_reduction))
9931 struct loop *innerloop = LOOP_VINFO_LOOP (
9932 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9933 imm_use_iterator imm_iter;
9934 use_operand_p use_p;
9935 tree scalar_dest;
9937 if (dump_enabled_p ())
9938 dump_printf_loc (MSG_NOTE, vect_location,
9939 "Record the vdef for outer-loop vectorization.\n");
9941 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9942 (to be used when vectorizing outer-loop stmts that use the DEF of
9943 STMT). */
9944 if (gimple_code (stmt) == GIMPLE_PHI)
9945 scalar_dest = PHI_RESULT (stmt);
9946 else
9947 scalar_dest = gimple_get_lhs (stmt);
9949 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9950 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9952 stmt_vec_info exit_phi_info
9953 = vinfo->lookup_stmt (USE_STMT (use_p));
9954 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9958 /* Handle stmts whose DEF is used outside the loop-nest that is
9959 being vectorized. */
9960 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9962 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9963 NULL);
9964 gcc_assert (done);
9967 if (vec_stmt)
9968 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9970 return is_store;
9974 /* Remove a group of stores (for SLP or interleaving), free their
9975 stmt_vec_info. */
9977 void
9978 vect_remove_stores (stmt_vec_info first_stmt_info)
9980 vec_info *vinfo = first_stmt_info->vinfo;
9981 stmt_vec_info next_stmt_info = first_stmt_info;
9983 while (next_stmt_info)
9985 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9986 next_stmt_info = vect_orig_stmt (next_stmt_info);
9987 /* Free the attached stmt_vec_info and remove the stmt. */
9988 vinfo->remove_stmt (next_stmt_info);
9989 next_stmt_info = tmp;
9993 /* Function get_vectype_for_scalar_type_and_size.
9995 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9996 by the target. */
9998 tree
9999 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
10001 tree orig_scalar_type = scalar_type;
10002 scalar_mode inner_mode;
10003 machine_mode simd_mode;
10004 poly_uint64 nunits;
10005 tree vectype;
10007 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
10008 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
10009 return NULL_TREE;
10011 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
10013 /* For vector types of elements whose mode precision doesn't
10014 match their types precision we use a element type of mode
10015 precision. The vectorization routines will have to make sure
10016 they support the proper result truncation/extension.
10017 We also make sure to build vector types with INTEGER_TYPE
10018 component type only. */
10019 if (INTEGRAL_TYPE_P (scalar_type)
10020 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
10021 || TREE_CODE (scalar_type) != INTEGER_TYPE))
10022 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10023 TYPE_UNSIGNED (scalar_type));
10025 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10026 When the component mode passes the above test simply use a type
10027 corresponding to that mode. The theory is that any use that
10028 would cause problems with this will disable vectorization anyway. */
10029 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10030 && !INTEGRAL_TYPE_P (scalar_type))
10031 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10033 /* We can't build a vector type of elements with alignment bigger than
10034 their size. */
10035 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10036 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10037 TYPE_UNSIGNED (scalar_type));
10039 /* If we felt back to using the mode fail if there was
10040 no scalar type for it. */
10041 if (scalar_type == NULL_TREE)
10042 return NULL_TREE;
10044 /* If no size was supplied use the mode the target prefers. Otherwise
10045 lookup a vector mode of the specified size. */
10046 if (known_eq (size, 0U))
10047 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
10048 else if (!multiple_p (size, nbytes, &nunits)
10049 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
10050 return NULL_TREE;
10051 /* NOTE: nunits == 1 is allowed to support single element vector types. */
10052 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
10053 return NULL_TREE;
10055 vectype = build_vector_type (scalar_type, nunits);
10057 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
10058 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
10059 return NULL_TREE;
10061 /* Re-attach the address-space qualifier if we canonicalized the scalar
10062 type. */
10063 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10064 return build_qualified_type
10065 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10067 return vectype;
10070 poly_uint64 current_vector_size;
10072 /* Function get_vectype_for_scalar_type.
10074 Returns the vector type corresponding to SCALAR_TYPE as supported
10075 by the target. */
10077 tree
10078 get_vectype_for_scalar_type (tree scalar_type)
10080 tree vectype;
10081 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10082 current_vector_size);
10083 if (vectype
10084 && known_eq (current_vector_size, 0U))
10085 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10086 return vectype;
10089 /* Function get_mask_type_for_scalar_type.
10091 Returns the mask type corresponding to a result of comparison
10092 of vectors of specified SCALAR_TYPE as supported by target. */
10094 tree
10095 get_mask_type_for_scalar_type (tree scalar_type)
10097 tree vectype = get_vectype_for_scalar_type (scalar_type);
10099 if (!vectype)
10100 return NULL;
10102 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10103 current_vector_size);
10106 /* Function get_same_sized_vectype
10108 Returns a vector type corresponding to SCALAR_TYPE of size
10109 VECTOR_TYPE if supported by the target. */
10111 tree
10112 get_same_sized_vectype (tree scalar_type, tree vector_type)
10114 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10115 return build_same_sized_truth_vector_type (vector_type);
10117 return get_vectype_for_scalar_type_and_size
10118 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
10121 /* Function vect_is_simple_use.
10123 Input:
10124 VINFO - the vect info of the loop or basic block that is being vectorized.
10125 OPERAND - operand in the loop or bb.
10126 Output:
10127 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10128 case OPERAND is an SSA_NAME that is defined in the vectorizable region
10129 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10130 the definition could be anywhere in the function
10131 DT - the type of definition
10133 Returns whether a stmt with OPERAND can be vectorized.
10134 For loops, supportable operands are constants, loop invariants, and operands
10135 that are defined by the current iteration of the loop. Unsupportable
10136 operands are those that are defined by a previous iteration of the loop (as
10137 is the case in reduction/induction computations).
10138 For basic blocks, supportable operands are constants and bb invariants.
10139 For now, operands defined outside the basic block are not supported. */
10141 bool
10142 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10143 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
10145 if (def_stmt_info_out)
10146 *def_stmt_info_out = NULL;
10147 if (def_stmt_out)
10148 *def_stmt_out = NULL;
10149 *dt = vect_unknown_def_type;
10151 if (dump_enabled_p ())
10153 dump_printf_loc (MSG_NOTE, vect_location,
10154 "vect_is_simple_use: operand ");
10155 if (TREE_CODE (operand) == SSA_NAME
10156 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10157 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10158 else
10159 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10162 if (CONSTANT_CLASS_P (operand))
10163 *dt = vect_constant_def;
10164 else if (is_gimple_min_invariant (operand))
10165 *dt = vect_external_def;
10166 else if (TREE_CODE (operand) != SSA_NAME)
10167 *dt = vect_unknown_def_type;
10168 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
10169 *dt = vect_external_def;
10170 else
10172 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10173 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10174 if (!stmt_vinfo)
10175 *dt = vect_external_def;
10176 else
10178 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
10179 def_stmt = stmt_vinfo->stmt;
10180 switch (gimple_code (def_stmt))
10182 case GIMPLE_PHI:
10183 case GIMPLE_ASSIGN:
10184 case GIMPLE_CALL:
10185 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10186 break;
10187 default:
10188 *dt = vect_unknown_def_type;
10189 break;
10191 if (def_stmt_info_out)
10192 *def_stmt_info_out = stmt_vinfo;
10194 if (def_stmt_out)
10195 *def_stmt_out = def_stmt;
10198 if (dump_enabled_p ())
10200 dump_printf (MSG_NOTE, ", type of def: ");
10201 switch (*dt)
10203 case vect_uninitialized_def:
10204 dump_printf (MSG_NOTE, "uninitialized\n");
10205 break;
10206 case vect_constant_def:
10207 dump_printf (MSG_NOTE, "constant\n");
10208 break;
10209 case vect_external_def:
10210 dump_printf (MSG_NOTE, "external\n");
10211 break;
10212 case vect_internal_def:
10213 dump_printf (MSG_NOTE, "internal\n");
10214 break;
10215 case vect_induction_def:
10216 dump_printf (MSG_NOTE, "induction\n");
10217 break;
10218 case vect_reduction_def:
10219 dump_printf (MSG_NOTE, "reduction\n");
10220 break;
10221 case vect_double_reduction_def:
10222 dump_printf (MSG_NOTE, "double reduction\n");
10223 break;
10224 case vect_nested_cycle:
10225 dump_printf (MSG_NOTE, "nested cycle\n");
10226 break;
10227 case vect_unknown_def_type:
10228 dump_printf (MSG_NOTE, "unknown\n");
10229 break;
10233 if (*dt == vect_unknown_def_type)
10235 if (dump_enabled_p ())
10236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10237 "Unsupported pattern.\n");
10238 return false;
10241 return true;
10244 /* Function vect_is_simple_use.
10246 Same as vect_is_simple_use but also determines the vector operand
10247 type of OPERAND and stores it to *VECTYPE. If the definition of
10248 OPERAND is vect_uninitialized_def, vect_constant_def or
10249 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10250 is responsible to compute the best suited vector type for the
10251 scalar operand. */
10253 bool
10254 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10255 tree *vectype, stmt_vec_info *def_stmt_info_out,
10256 gimple **def_stmt_out)
10258 stmt_vec_info def_stmt_info;
10259 gimple *def_stmt;
10260 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10261 return false;
10263 if (def_stmt_out)
10264 *def_stmt_out = def_stmt;
10265 if (def_stmt_info_out)
10266 *def_stmt_info_out = def_stmt_info;
10268 /* Now get a vector type if the def is internal, otherwise supply
10269 NULL_TREE and leave it up to the caller to figure out a proper
10270 type for the use stmt. */
10271 if (*dt == vect_internal_def
10272 || *dt == vect_induction_def
10273 || *dt == vect_reduction_def
10274 || *dt == vect_double_reduction_def
10275 || *dt == vect_nested_cycle)
10277 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10278 gcc_assert (*vectype != NULL_TREE);
10279 if (dump_enabled_p ())
10280 dump_printf_loc (MSG_NOTE, vect_location,
10281 "vect_is_simple_use: vectype %T\n", *vectype);
10283 else if (*dt == vect_uninitialized_def
10284 || *dt == vect_constant_def
10285 || *dt == vect_external_def)
10286 *vectype = NULL_TREE;
10287 else
10288 gcc_unreachable ();
10290 return true;
10294 /* Function supportable_widening_operation
10296 Check whether an operation represented by the code CODE is a
10297 widening operation that is supported by the target platform in
10298 vector form (i.e., when operating on arguments of type VECTYPE_IN
10299 producing a result of type VECTYPE_OUT).
10301 Widening operations we currently support are NOP (CONVERT), FLOAT,
10302 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10303 are supported by the target platform either directly (via vector
10304 tree-codes), or via target builtins.
10306 Output:
10307 - CODE1 and CODE2 are codes of vector operations to be used when
10308 vectorizing the operation, if available.
10309 - MULTI_STEP_CVT determines the number of required intermediate steps in
10310 case of multi-step conversion (like char->short->int - in that case
10311 MULTI_STEP_CVT will be 1).
10312 - INTERM_TYPES contains the intermediate type required to perform the
10313 widening operation (short in the above example). */
10315 bool
10316 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10317 tree vectype_out, tree vectype_in,
10318 enum tree_code *code1, enum tree_code *code2,
10319 int *multi_step_cvt,
10320 vec<tree> *interm_types)
10322 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10323 struct loop *vect_loop = NULL;
10324 machine_mode vec_mode;
10325 enum insn_code icode1, icode2;
10326 optab optab1, optab2;
10327 tree vectype = vectype_in;
10328 tree wide_vectype = vectype_out;
10329 enum tree_code c1, c2;
10330 int i;
10331 tree prev_type, intermediate_type;
10332 machine_mode intermediate_mode, prev_mode;
10333 optab optab3, optab4;
10335 *multi_step_cvt = 0;
10336 if (loop_info)
10337 vect_loop = LOOP_VINFO_LOOP (loop_info);
10339 switch (code)
10341 case WIDEN_MULT_EXPR:
10342 /* The result of a vectorized widening operation usually requires
10343 two vectors (because the widened results do not fit into one vector).
10344 The generated vector results would normally be expected to be
10345 generated in the same order as in the original scalar computation,
10346 i.e. if 8 results are generated in each vector iteration, they are
10347 to be organized as follows:
10348 vect1: [res1,res2,res3,res4],
10349 vect2: [res5,res6,res7,res8].
10351 However, in the special case that the result of the widening
10352 operation is used in a reduction computation only, the order doesn't
10353 matter (because when vectorizing a reduction we change the order of
10354 the computation). Some targets can take advantage of this and
10355 generate more efficient code. For example, targets like Altivec,
10356 that support widen_mult using a sequence of {mult_even,mult_odd}
10357 generate the following vectors:
10358 vect1: [res1,res3,res5,res7],
10359 vect2: [res2,res4,res6,res8].
10361 When vectorizing outer-loops, we execute the inner-loop sequentially
10362 (each vectorized inner-loop iteration contributes to VF outer-loop
10363 iterations in parallel). We therefore don't allow to change the
10364 order of the computation in the inner-loop during outer-loop
10365 vectorization. */
10366 /* TODO: Another case in which order doesn't *really* matter is when we
10367 widen and then contract again, e.g. (short)((int)x * y >> 8).
10368 Normally, pack_trunc performs an even/odd permute, whereas the
10369 repack from an even/odd expansion would be an interleave, which
10370 would be significantly simpler for e.g. AVX2. */
10371 /* In any case, in order to avoid duplicating the code below, recurse
10372 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10373 are properly set up for the caller. If we fail, we'll continue with
10374 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10375 if (vect_loop
10376 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10377 && !nested_in_vect_loop_p (vect_loop, stmt_info)
10378 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10379 stmt_info, vectype_out,
10380 vectype_in, code1, code2,
10381 multi_step_cvt, interm_types))
10383 /* Elements in a vector with vect_used_by_reduction property cannot
10384 be reordered if the use chain with this property does not have the
10385 same operation. One such an example is s += a * b, where elements
10386 in a and b cannot be reordered. Here we check if the vector defined
10387 by STMT is only directly used in the reduction statement. */
10388 tree lhs = gimple_assign_lhs (stmt_info->stmt);
10389 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10390 if (use_stmt_info
10391 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10392 return true;
10394 c1 = VEC_WIDEN_MULT_LO_EXPR;
10395 c2 = VEC_WIDEN_MULT_HI_EXPR;
10396 break;
10398 case DOT_PROD_EXPR:
10399 c1 = DOT_PROD_EXPR;
10400 c2 = DOT_PROD_EXPR;
10401 break;
10403 case SAD_EXPR:
10404 c1 = SAD_EXPR;
10405 c2 = SAD_EXPR;
10406 break;
10408 case VEC_WIDEN_MULT_EVEN_EXPR:
10409 /* Support the recursion induced just above. */
10410 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10411 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10412 break;
10414 case WIDEN_LSHIFT_EXPR:
10415 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10416 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10417 break;
10419 CASE_CONVERT:
10420 c1 = VEC_UNPACK_LO_EXPR;
10421 c2 = VEC_UNPACK_HI_EXPR;
10422 break;
10424 case FLOAT_EXPR:
10425 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10426 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10427 break;
10429 case FIX_TRUNC_EXPR:
10430 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10431 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10432 break;
10434 default:
10435 gcc_unreachable ();
10438 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10439 std::swap (c1, c2);
10441 if (code == FIX_TRUNC_EXPR)
10443 /* The signedness is determined from output operand. */
10444 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10445 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10447 else if (CONVERT_EXPR_CODE_P (code)
10448 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
10449 && VECTOR_BOOLEAN_TYPE_P (vectype)
10450 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
10451 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10453 /* If the input and result modes are the same, a different optab
10454 is needed where we pass in the number of units in vectype. */
10455 optab1 = vec_unpacks_sbool_lo_optab;
10456 optab2 = vec_unpacks_sbool_hi_optab;
10458 else
10460 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10461 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10464 if (!optab1 || !optab2)
10465 return false;
10467 vec_mode = TYPE_MODE (vectype);
10468 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10469 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10470 return false;
10472 *code1 = c1;
10473 *code2 = c2;
10475 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10476 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10478 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10479 return true;
10480 /* For scalar masks we may have different boolean
10481 vector types having the same QImode. Thus we
10482 add additional check for elements number. */
10483 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10484 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10485 return true;
10488 /* Check if it's a multi-step conversion that can be done using intermediate
10489 types. */
10491 prev_type = vectype;
10492 prev_mode = vec_mode;
10494 if (!CONVERT_EXPR_CODE_P (code))
10495 return false;
10497 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10498 intermediate steps in promotion sequence. We try
10499 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10500 not. */
10501 interm_types->create (MAX_INTERM_CVT_STEPS);
10502 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10504 intermediate_mode = insn_data[icode1].operand[0].mode;
10505 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10507 intermediate_type = vect_halve_mask_nunits (prev_type);
10508 if (intermediate_mode != TYPE_MODE (intermediate_type))
10509 return false;
10511 else
10512 intermediate_type
10513 = lang_hooks.types.type_for_mode (intermediate_mode,
10514 TYPE_UNSIGNED (prev_type));
10516 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10517 && VECTOR_BOOLEAN_TYPE_P (prev_type)
10518 && intermediate_mode == prev_mode
10519 && SCALAR_INT_MODE_P (prev_mode))
10521 /* If the input and result modes are the same, a different optab
10522 is needed where we pass in the number of units in vectype. */
10523 optab3 = vec_unpacks_sbool_lo_optab;
10524 optab4 = vec_unpacks_sbool_hi_optab;
10526 else
10528 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10529 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10532 if (!optab3 || !optab4
10533 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10534 || insn_data[icode1].operand[0].mode != intermediate_mode
10535 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10536 || insn_data[icode2].operand[0].mode != intermediate_mode
10537 || ((icode1 = optab_handler (optab3, intermediate_mode))
10538 == CODE_FOR_nothing)
10539 || ((icode2 = optab_handler (optab4, intermediate_mode))
10540 == CODE_FOR_nothing))
10541 break;
10543 interm_types->quick_push (intermediate_type);
10544 (*multi_step_cvt)++;
10546 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10547 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10549 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10550 return true;
10551 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10552 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10553 return true;
10556 prev_type = intermediate_type;
10557 prev_mode = intermediate_mode;
10560 interm_types->release ();
10561 return false;
10565 /* Function supportable_narrowing_operation
10567 Check whether an operation represented by the code CODE is a
10568 narrowing operation that is supported by the target platform in
10569 vector form (i.e., when operating on arguments of type VECTYPE_IN
10570 and producing a result of type VECTYPE_OUT).
10572 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10573 and FLOAT. This function checks if these operations are supported by
10574 the target platform directly via vector tree-codes.
10576 Output:
10577 - CODE1 is the code of a vector operation to be used when
10578 vectorizing the operation, if available.
10579 - MULTI_STEP_CVT determines the number of required intermediate steps in
10580 case of multi-step conversion (like int->short->char - in that case
10581 MULTI_STEP_CVT will be 1).
10582 - INTERM_TYPES contains the intermediate type required to perform the
10583 narrowing operation (short in the above example). */
10585 bool
10586 supportable_narrowing_operation (enum tree_code code,
10587 tree vectype_out, tree vectype_in,
10588 enum tree_code *code1, int *multi_step_cvt,
10589 vec<tree> *interm_types)
10591 machine_mode vec_mode;
10592 enum insn_code icode1;
10593 optab optab1, interm_optab;
10594 tree vectype = vectype_in;
10595 tree narrow_vectype = vectype_out;
10596 enum tree_code c1;
10597 tree intermediate_type, prev_type;
10598 machine_mode intermediate_mode, prev_mode;
10599 int i;
10600 bool uns;
10602 *multi_step_cvt = 0;
10603 switch (code)
10605 CASE_CONVERT:
10606 c1 = VEC_PACK_TRUNC_EXPR;
10607 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
10608 && VECTOR_BOOLEAN_TYPE_P (vectype)
10609 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
10610 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10611 optab1 = vec_pack_sbool_trunc_optab;
10612 else
10613 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10614 break;
10616 case FIX_TRUNC_EXPR:
10617 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10618 /* The signedness is determined from output operand. */
10619 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10620 break;
10622 case FLOAT_EXPR:
10623 c1 = VEC_PACK_FLOAT_EXPR;
10624 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10625 break;
10627 default:
10628 gcc_unreachable ();
10631 if (!optab1)
10632 return false;
10634 vec_mode = TYPE_MODE (vectype);
10635 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10636 return false;
10638 *code1 = c1;
10640 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10642 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10643 return true;
10644 /* For scalar masks we may have different boolean
10645 vector types having the same QImode. Thus we
10646 add additional check for elements number. */
10647 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10648 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10649 return true;
10652 if (code == FLOAT_EXPR)
10653 return false;
10655 /* Check if it's a multi-step conversion that can be done using intermediate
10656 types. */
10657 prev_mode = vec_mode;
10658 prev_type = vectype;
10659 if (code == FIX_TRUNC_EXPR)
10660 uns = TYPE_UNSIGNED (vectype_out);
10661 else
10662 uns = TYPE_UNSIGNED (vectype);
10664 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10665 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10666 costly than signed. */
10667 if (code == FIX_TRUNC_EXPR && uns)
10669 enum insn_code icode2;
10671 intermediate_type
10672 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10673 interm_optab
10674 = optab_for_tree_code (c1, intermediate_type, optab_default);
10675 if (interm_optab != unknown_optab
10676 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10677 && insn_data[icode1].operand[0].mode
10678 == insn_data[icode2].operand[0].mode)
10680 uns = false;
10681 optab1 = interm_optab;
10682 icode1 = icode2;
10686 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10687 intermediate steps in promotion sequence. We try
10688 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10689 interm_types->create (MAX_INTERM_CVT_STEPS);
10690 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10692 intermediate_mode = insn_data[icode1].operand[0].mode;
10693 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10695 intermediate_type = vect_double_mask_nunits (prev_type);
10696 if (intermediate_mode != TYPE_MODE (intermediate_type))
10697 return false;
10699 else
10700 intermediate_type
10701 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10702 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10703 && VECTOR_BOOLEAN_TYPE_P (prev_type)
10704 && intermediate_mode == prev_mode
10705 && SCALAR_INT_MODE_P (prev_mode))
10706 interm_optab = vec_pack_sbool_trunc_optab;
10707 else
10708 interm_optab
10709 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10710 optab_default);
10711 if (!interm_optab
10712 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10713 || insn_data[icode1].operand[0].mode != intermediate_mode
10714 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10715 == CODE_FOR_nothing))
10716 break;
10718 interm_types->quick_push (intermediate_type);
10719 (*multi_step_cvt)++;
10721 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10723 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10724 return true;
10725 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10726 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10727 return true;
10730 prev_mode = intermediate_mode;
10731 prev_type = intermediate_type;
10732 optab1 = interm_optab;
10735 interm_types->release ();
10736 return false;
10739 /* Generate and return a statement that sets vector mask MASK such that
10740 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10742 gcall *
10743 vect_gen_while (tree mask, tree start_index, tree end_index)
10745 tree cmp_type = TREE_TYPE (start_index);
10746 tree mask_type = TREE_TYPE (mask);
10747 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10748 cmp_type, mask_type,
10749 OPTIMIZE_FOR_SPEED));
10750 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10751 start_index, end_index,
10752 build_zero_cst (mask_type));
10753 gimple_call_set_lhs (call, mask);
10754 return call;
10757 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10758 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10760 tree
10761 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10762 tree end_index)
10764 tree tmp = make_ssa_name (mask_type);
10765 gcall *call = vect_gen_while (tmp, start_index, end_index);
10766 gimple_seq_add_stmt (seq, call);
10767 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10770 /* Try to compute the vector types required to vectorize STMT_INFO,
10771 returning true on success and false if vectorization isn't possible.
10773 On success:
10775 - Set *STMT_VECTYPE_OUT to:
10776 - NULL_TREE if the statement doesn't need to be vectorized;
10777 - boolean_type_node if the statement is a boolean operation whose
10778 vector type can only be determined once all the other vector types
10779 are known; and
10780 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10782 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10783 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10784 statement does not help to determine the overall number of units. */
10786 opt_result
10787 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10788 tree *stmt_vectype_out,
10789 tree *nunits_vectype_out)
10791 gimple *stmt = stmt_info->stmt;
10793 *stmt_vectype_out = NULL_TREE;
10794 *nunits_vectype_out = NULL_TREE;
10796 if (gimple_get_lhs (stmt) == NULL_TREE
10797 /* MASK_STORE has no lhs, but is ok. */
10798 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10800 if (is_a <gcall *> (stmt))
10802 /* Ignore calls with no lhs. These must be calls to
10803 #pragma omp simd functions, and what vectorization factor
10804 it really needs can't be determined until
10805 vectorizable_simd_clone_call. */
10806 if (dump_enabled_p ())
10807 dump_printf_loc (MSG_NOTE, vect_location,
10808 "defer to SIMD clone analysis.\n");
10809 return opt_result::success ();
10812 return opt_result::failure_at (stmt,
10813 "not vectorized: irregular stmt.%G", stmt);
10816 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10817 return opt_result::failure_at (stmt,
10818 "not vectorized: vector stmt in loop:%G",
10819 stmt);
10821 tree vectype;
10822 tree scalar_type = NULL_TREE;
10823 if (STMT_VINFO_VECTYPE (stmt_info))
10824 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10825 else
10827 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10828 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10829 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10830 else
10831 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10833 /* Pure bool ops don't participate in number-of-units computation.
10834 For comparisons use the types being compared. */
10835 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10836 && is_gimple_assign (stmt)
10837 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10839 *stmt_vectype_out = boolean_type_node;
10841 tree rhs1 = gimple_assign_rhs1 (stmt);
10842 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10843 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10844 scalar_type = TREE_TYPE (rhs1);
10845 else
10847 if (dump_enabled_p ())
10848 dump_printf_loc (MSG_NOTE, vect_location,
10849 "pure bool operation.\n");
10850 return opt_result::success ();
10854 if (dump_enabled_p ())
10855 dump_printf_loc (MSG_NOTE, vect_location,
10856 "get vectype for scalar type: %T\n", scalar_type);
10857 vectype = get_vectype_for_scalar_type (scalar_type);
10858 if (!vectype)
10859 return opt_result::failure_at (stmt,
10860 "not vectorized:"
10861 " unsupported data-type %T\n",
10862 scalar_type);
10864 if (!*stmt_vectype_out)
10865 *stmt_vectype_out = vectype;
10867 if (dump_enabled_p ())
10868 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
10871 /* Don't try to compute scalar types if the stmt produces a boolean
10872 vector; use the existing vector type instead. */
10873 tree nunits_vectype;
10874 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10875 nunits_vectype = vectype;
10876 else
10878 /* The number of units is set according to the smallest scalar
10879 type (or the largest vector size, but we only support one
10880 vector size per vectorization). */
10881 if (*stmt_vectype_out != boolean_type_node)
10883 HOST_WIDE_INT dummy;
10884 scalar_type = vect_get_smallest_scalar_type (stmt_info,
10885 &dummy, &dummy);
10887 if (dump_enabled_p ())
10888 dump_printf_loc (MSG_NOTE, vect_location,
10889 "get vectype for scalar type: %T\n", scalar_type);
10890 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10892 if (!nunits_vectype)
10893 return opt_result::failure_at (stmt,
10894 "not vectorized: unsupported data-type %T\n",
10895 scalar_type);
10897 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10898 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10899 return opt_result::failure_at (stmt,
10900 "not vectorized: different sized vector "
10901 "types in statement, %T and %T\n",
10902 vectype, nunits_vectype);
10904 if (dump_enabled_p ())
10906 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10907 nunits_vectype);
10909 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10910 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10911 dump_printf (MSG_NOTE, "\n");
10914 *nunits_vectype_out = nunits_vectype;
10915 return opt_result::success ();
10918 /* Try to determine the correct vector type for STMT_INFO, which is a
10919 statement that produces a scalar boolean result. Return the vector
10920 type on success, otherwise return NULL_TREE. */
10922 opt_tree
10923 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10925 gimple *stmt = stmt_info->stmt;
10926 tree mask_type = NULL;
10927 tree vectype, scalar_type;
10929 if (is_gimple_assign (stmt)
10930 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10931 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10933 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10934 mask_type = get_mask_type_for_scalar_type (scalar_type);
10936 if (!mask_type)
10937 return opt_tree::failure_at (stmt,
10938 "not vectorized: unsupported mask\n");
10940 else
10942 tree rhs;
10943 ssa_op_iter iter;
10944 enum vect_def_type dt;
10946 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10948 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10949 return opt_tree::failure_at (stmt,
10950 "not vectorized:can't compute mask"
10951 " type for statement, %G", stmt);
10953 /* No vectype probably means external definition.
10954 Allow it in case there is another operand which
10955 allows to determine mask type. */
10956 if (!vectype)
10957 continue;
10959 if (!mask_type)
10960 mask_type = vectype;
10961 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10962 TYPE_VECTOR_SUBPARTS (vectype)))
10963 return opt_tree::failure_at (stmt,
10964 "not vectorized: different sized mask"
10965 " types in statement, %T and %T\n",
10966 mask_type, vectype);
10967 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10968 != VECTOR_BOOLEAN_TYPE_P (vectype))
10969 return opt_tree::failure_at (stmt,
10970 "not vectorized: mixed mask and "
10971 "nonmask vector types in statement, "
10972 "%T and %T\n",
10973 mask_type, vectype);
10976 /* We may compare boolean value loaded as vector of integers.
10977 Fix mask_type in such case. */
10978 if (mask_type
10979 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10980 && gimple_code (stmt) == GIMPLE_ASSIGN
10981 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10982 mask_type = build_same_sized_truth_vector_type (mask_type);
10985 /* No mask_type should mean loop invariant predicate.
10986 This is probably a subject for optimization in if-conversion. */
10987 if (!mask_type)
10988 return opt_tree::failure_at (stmt,
10989 "not vectorized: can't compute mask type "
10990 "for statement: %G", stmt);
10992 return opt_tree::success (mask_type);