[RS6000] biarch test fail
[official-gcc.git] / gcc / tree-vect-stmts.c
blob3575f25241f3550e63d783a19bb3cfb237a41760
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
781 if (fatal)
782 *fatal = false;
783 return res;
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
806 int inside_cost = 0, prologue_cost = 0;
808 gcc_assert (cost_vec != NULL);
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
841 static void
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 enum vect_def_type *dt,
844 unsigned int ncopies, int pwr,
845 stmt_vector_for_cost *cost_vec)
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
850 for (i = 0; i < pwr + 1; i++)
852 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 stmt_info, 0, vect_body);
854 ncopies *= 2;
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost, prologue_cost);
869 /* Returns true if the current function returns DECL. */
871 static bool
872 cfun_returns (tree decl)
874 edge_iterator ei;
875 edge e;
876 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
878 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879 if (!ret)
880 continue;
881 if (gimple_return_retval (ret) == decl)
882 return true;
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
885 though. */
886 gimple *def = ret;
889 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
891 while (gimple_clobber_p (def));
892 if (is_a <gassign *> (def)
893 && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 && gimple_assign_rhs1 (def) == decl)
895 return true;
897 return false;
900 /* Function vect_model_store_cost
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
905 static void
906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 vect_memory_access_type memory_access_type,
908 vec_load_store_type vls_type, slp_tree slp_node,
909 stmt_vector_for_cost *cost_vec)
911 unsigned int inside_cost = 0, prologue_cost = 0;
912 stmt_vec_info first_stmt_info = stmt_info;
913 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
915 /* ??? Somehow we need to fix this at the callers. */
916 if (slp_node)
917 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
919 if (vls_type == VLS_STORE_INVARIANT)
921 if (!slp_node)
922 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt_info == stmt_info);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = DR_GROUP_SIZE (first_stmt_info);
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963 inside_cost += record_stmt_cost (cost_vec,
964 ncopies * assumed_nunits,
965 scalar_store, stmt_info, 0, vect_body);
967 else
968 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
970 if (memory_access_type == VMAT_ELEMENTWISE
971 || memory_access_type == VMAT_STRIDED_SLP)
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975 inside_cost += record_stmt_cost (cost_vec,
976 ncopies * assumed_nunits,
977 vec_to_scalar, stmt_info, 0, vect_body);
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985 if (base
986 && (TREE_CODE (base) == RESULT_DECL
987 || (DECL_P (base) && cfun_returns (base)))
988 && !aggregate_value_p (base, cfun->decl))
990 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
992 if (REG_P (reg))
994 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
997 if (nregs > 1)
999 /* Spill. */
1000 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 vector_store,
1002 stmt_info, 0, vect_epilogue);
1003 /* Loads. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 scalar_load,
1006 stmt_info, 0, vect_epilogue);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1018 /* Calculate cost of DR's memory access. */
1019 void
1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 unsigned int *inside_cost,
1022 stmt_vector_for_cost *body_cost_vec)
1024 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1028 switch (alignment_support_scheme)
1030 case dr_aligned:
1032 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 vector_store, stmt_info, 0,
1034 vect_body);
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: aligned.\n");
1039 break;
1042 case dr_unaligned_supported:
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 unaligned_store, stmt_info,
1047 DR_MISALIGNMENT (dr_info),
1048 vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_store_cost: unaligned supported by "
1052 "hardware.\n");
1053 break;
1056 case dr_unaligned_unsupported:
1058 *inside_cost = VECT_MAX_COST;
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062 "vect_model_store_cost: unsupported access.\n");
1063 break;
1066 default:
1067 gcc_unreachable ();
1072 /* Function vect_model_load_cost
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1079 static void
1080 vect_model_load_cost (vec_info *vinfo,
1081 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 vect_memory_access_type memory_access_type,
1083 slp_tree slp_node,
1084 stmt_vector_for_cost *cost_vec)
1086 unsigned int inside_cost = 0, prologue_cost = 0;
1087 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1089 gcc_assert (cost_vec);
1091 /* ??? Somehow we need to fix this at the callers. */
1092 if (slp_node)
1093 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1095 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms;
1102 unsigned assumed_nunits
1103 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1104 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1105 vf, true, &n_perms);
1106 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1107 first_stmt_info, 0, vect_body);
1108 /* And adjust the number of loads performed. This handles
1109 redundancies as well as loads that are later dead. */
1110 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1111 bitmap_clear (perm);
1112 for (unsigned i = 0;
1113 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1114 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1115 ncopies = 0;
1116 bool load_seen = false;
1117 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1119 if (i % assumed_nunits == 0)
1121 if (load_seen)
1122 ncopies++;
1123 load_seen = false;
1125 if (bitmap_bit_p (perm, i))
1126 load_seen = true;
1128 if (load_seen)
1129 ncopies++;
1130 gcc_assert (ncopies
1131 <= (DR_GROUP_SIZE (first_stmt_info)
1132 - DR_GROUP_GAP (first_stmt_info)
1133 + assumed_nunits - 1) / assumed_nunits);
1136 /* Grouped loads read all elements in the group at once,
1137 so we want the DR for the first statement. */
1138 stmt_vec_info first_stmt_info = stmt_info;
1139 if (!slp_node && grouped_access_p)
1140 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1142 /* True if we should include any once-per-group costs as well as
1143 the cost of the statement itself. For SLP we only get called
1144 once per group anyhow. */
1145 bool first_stmt_p = (first_stmt_info == stmt_info);
1147 /* We assume that the cost of a single load-lanes instruction is
1148 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1149 access is instead being provided by a load-and-permute operation,
1150 include the cost of the permutes. */
1151 if (first_stmt_p
1152 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1154 /* Uses an even and odd extract operations or shuffle operations
1155 for each needed permute. */
1156 int group_size = DR_GROUP_SIZE (first_stmt_info);
1157 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1158 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1159 stmt_info, 0, vect_body);
1161 if (dump_enabled_p ())
1162 dump_printf_loc (MSG_NOTE, vect_location,
1163 "vect_model_load_cost: strided group_size = %d .\n",
1164 group_size);
1167 /* The loads themselves. */
1168 if (memory_access_type == VMAT_ELEMENTWISE
1169 || memory_access_type == VMAT_GATHER_SCATTER)
1171 /* N scalar loads plus gathering them into a vector. */
1172 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1173 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1174 inside_cost += record_stmt_cost (cost_vec,
1175 ncopies * assumed_nunits,
1176 scalar_load, stmt_info, 0, vect_body);
1178 else
1179 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1180 &inside_cost, &prologue_cost,
1181 cost_vec, cost_vec, true);
1182 if (memory_access_type == VMAT_ELEMENTWISE
1183 || memory_access_type == VMAT_STRIDED_SLP)
1184 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1185 stmt_info, 0, vect_body);
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: inside_cost = %d, "
1190 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1194 /* Calculate cost of DR's memory access. */
1195 void
1196 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1197 bool add_realign_cost, unsigned int *inside_cost,
1198 unsigned int *prologue_cost,
1199 stmt_vector_for_cost *prologue_cost_vec,
1200 stmt_vector_for_cost *body_cost_vec,
1201 bool record_prologue_costs)
1203 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1204 int alignment_support_scheme
1205 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1207 switch (alignment_support_scheme)
1209 case dr_aligned:
1211 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1212 stmt_info, 0, vect_body);
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_NOTE, vect_location,
1216 "vect_model_load_cost: aligned.\n");
1218 break;
1220 case dr_unaligned_supported:
1222 /* Here, we assign an additional cost for the unaligned load. */
1223 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1224 unaligned_load, stmt_info,
1225 DR_MISALIGNMENT (dr_info),
1226 vect_body);
1228 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_model_load_cost: unaligned supported by "
1231 "hardware.\n");
1233 break;
1235 case dr_explicit_realign:
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1238 vector_load, stmt_info, 0, vect_body);
1239 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1240 vec_perm, stmt_info, 0, vect_body);
1242 /* FIXME: If the misalignment remains fixed across the iterations of
1243 the containing loop, the following cost should be added to the
1244 prologue costs. */
1245 if (targetm.vectorize.builtin_mask_for_load)
1246 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1247 stmt_info, 0, vect_body);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: explicit realign\n");
1253 break;
1255 case dr_explicit_realign_optimized:
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_model_load_cost: unaligned software "
1260 "pipelined.\n");
1262 /* Unaligned software pipeline has a load of an address, an initial
1263 load, and possibly a mask operation to "prime" the loop. However,
1264 if this is an access in a group of loads, which provide grouped
1265 access, then the above cost should only be considered for one
1266 access in the group. Inside the loop, there is a load op
1267 and a realignment op. */
1269 if (add_realign_cost && record_prologue_costs)
1271 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1272 vector_stmt, stmt_info,
1273 0, vect_prologue);
1274 if (targetm.vectorize.builtin_mask_for_load)
1275 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1276 vector_stmt, stmt_info,
1277 0, vect_prologue);
1280 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1281 stmt_info, 0, vect_body);
1282 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1283 stmt_info, 0, vect_body);
1285 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE, vect_location,
1287 "vect_model_load_cost: explicit realign optimized"
1288 "\n");
1290 break;
1293 case dr_unaligned_unsupported:
1295 *inside_cost = VECT_MAX_COST;
1297 if (dump_enabled_p ())
1298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1299 "vect_model_load_cost: unsupported access.\n");
1300 break;
1303 default:
1304 gcc_unreachable ();
1308 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1309 the loop preheader for the vectorized stmt STMT_VINFO. */
1311 static void
1312 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1313 gimple_stmt_iterator *gsi)
1315 if (gsi)
1316 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1317 else
1318 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "created new init_stmt: %G", new_stmt);
1325 /* Function vect_init_vector.
1327 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1328 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1329 vector type a vector with all elements equal to VAL is created first.
1330 Place the initialization at GSI if it is not NULL. Otherwise, place the
1331 initialization at the loop preheader.
1332 Return the DEF of INIT_STMT.
1333 It will be used in the vectorization of STMT_INFO. */
1335 tree
1336 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1337 gimple_stmt_iterator *gsi)
1339 gimple *init_stmt;
1340 tree new_temp;
1342 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1343 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1345 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1346 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1348 /* Scalar boolean value should be transformed into
1349 all zeros or all ones value before building a vector. */
1350 if (VECTOR_BOOLEAN_TYPE_P (type))
1352 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1353 tree false_val = build_zero_cst (TREE_TYPE (type));
1355 if (CONSTANT_CLASS_P (val))
1356 val = integer_zerop (val) ? false_val : true_val;
1357 else
1359 new_temp = make_ssa_name (TREE_TYPE (type));
1360 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1361 val, true_val, false_val);
1362 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1363 val = new_temp;
1366 else
1368 gimple_seq stmts = NULL;
1369 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1370 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1371 TREE_TYPE (type), val);
1372 else
1373 /* ??? Condition vectorization expects us to do
1374 promotion of invariant/external defs. */
1375 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1376 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1377 !gsi_end_p (gsi2); )
1379 init_stmt = gsi_stmt (gsi2);
1380 gsi_remove (&gsi2, false);
1381 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1385 val = build_vector_from_val (type, val);
1388 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1389 init_stmt = gimple_build_assign (new_temp, val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 return new_temp;
1395 /* Function vect_get_vec_defs_for_operand.
1397 OP is an operand in STMT_VINFO. This function returns a vector of
1398 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1400 In the case that OP is an SSA_NAME which is defined in the loop, then
1401 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1403 In case OP is an invariant or constant, a new stmt that creates a vector def
1404 needs to be introduced. VECTYPE may be used to specify a required type for
1405 vector invariant. */
1407 void
1408 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1409 unsigned ncopies,
1410 tree op, vec<tree> *vec_oprnds, tree vectype)
1412 gimple *def_stmt;
1413 enum vect_def_type dt;
1414 bool is_simple_use;
1415 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1417 if (dump_enabled_p ())
1418 dump_printf_loc (MSG_NOTE, vect_location,
1419 "vect_get_vec_defs_for_operand: %T\n", op);
1421 stmt_vec_info def_stmt_info;
1422 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1423 &def_stmt_info, &def_stmt);
1424 gcc_assert (is_simple_use);
1425 if (def_stmt && dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1428 vec_oprnds->create (ncopies);
1429 if (dt == vect_constant_def || dt == vect_external_def)
1431 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1432 tree vector_type;
1434 if (vectype)
1435 vector_type = vectype;
1436 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1437 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1438 vector_type = truth_type_for (stmt_vectype);
1439 else
1440 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1442 gcc_assert (vector_type);
1443 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1444 while (ncopies--)
1445 vec_oprnds->quick_push (vop);
1447 else
1449 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1450 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1451 for (unsigned i = 0; i < ncopies; ++i)
1452 vec_oprnds->quick_push (gimple_get_lhs
1453 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1458 /* Get vectorized definitions for OP0 and OP1. */
1460 void
1461 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1462 unsigned ncopies,
1463 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1464 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1465 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1466 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1468 if (slp_node)
1470 if (op0)
1471 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1472 if (op1)
1473 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1474 if (op2)
1475 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1476 if (op3)
1477 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1479 else
1481 if (op0)
1482 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1483 op0, vec_oprnds0, vectype0);
1484 if (op1)
1485 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1486 op1, vec_oprnds1, vectype1);
1487 if (op2)
1488 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1489 op2, vec_oprnds2, vectype2);
1490 if (op3)
1491 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1492 op3, vec_oprnds3, vectype3);
1496 void
1497 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1498 unsigned ncopies,
1499 tree op0, vec<tree> *vec_oprnds0,
1500 tree op1, vec<tree> *vec_oprnds1,
1501 tree op2, vec<tree> *vec_oprnds2,
1502 tree op3, vec<tree> *vec_oprnds3)
1504 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1505 op0, vec_oprnds0, NULL_TREE,
1506 op1, vec_oprnds1, NULL_TREE,
1507 op2, vec_oprnds2, NULL_TREE,
1508 op3, vec_oprnds3, NULL_TREE);
1511 /* Helper function called by vect_finish_replace_stmt and
1512 vect_finish_stmt_generation. Set the location of the new
1513 statement and create and return a stmt_vec_info for it. */
1515 static void
1516 vect_finish_stmt_generation_1 (vec_info *,
1517 stmt_vec_info stmt_info, gimple *vec_stmt)
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1522 if (stmt_info)
1524 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1526 /* While EH edges will generally prevent vectorization, stmt might
1527 e.g. be in a must-not-throw region. Ensure newly created stmts
1528 that could throw are part of the same region. */
1529 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1530 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1531 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1533 else
1534 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1537 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1538 which sets the same scalar result as STMT_INFO did. Create and return a
1539 stmt_vec_info for VEC_STMT. */
1541 void
1542 vect_finish_replace_stmt (vec_info *vinfo,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1545 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1546 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1548 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1549 gsi_replace (&gsi, vec_stmt, true);
1551 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1554 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1555 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1557 void
1558 vect_finish_stmt_generation (vec_info *vinfo,
1559 stmt_vec_info stmt_info, gimple *vec_stmt,
1560 gimple_stmt_iterator *gsi)
1562 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1564 if (!gsi_end_p (*gsi)
1565 && gimple_has_mem_ops (vec_stmt))
1567 gimple *at_stmt = gsi_stmt (*gsi);
1568 tree vuse = gimple_vuse (at_stmt);
1569 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1571 tree vdef = gimple_vdef (at_stmt);
1572 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1573 gimple_set_modified (vec_stmt, true);
1574 /* If we have an SSA vuse and insert a store, update virtual
1575 SSA form to avoid triggering the renamer. Do so only
1576 if we can easily see all uses - which is what almost always
1577 happens with the way vectorized stmts are inserted. */
1578 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1579 && ((is_gimple_assign (vec_stmt)
1580 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1581 || (is_gimple_call (vec_stmt)
1582 && !(gimple_call_flags (vec_stmt)
1583 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1585 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1586 gimple_set_vdef (vec_stmt, new_vdef);
1587 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1591 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1592 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1595 /* We want to vectorize a call to combined function CFN with function
1596 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1597 as the types of all inputs. Check whether this is possible using
1598 an internal function, returning its code if so or IFN_LAST if not. */
1600 static internal_fn
1601 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1602 tree vectype_out, tree vectype_in)
1604 internal_fn ifn;
1605 if (internal_fn_p (cfn))
1606 ifn = as_internal_fn (cfn);
1607 else
1608 ifn = associated_internal_fn (fndecl);
1609 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1611 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1612 if (info.vectorizable)
1614 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1615 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1616 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1617 OPTIMIZE_FOR_SPEED))
1618 return ifn;
1621 return IFN_LAST;
1625 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1626 gimple_stmt_iterator *);
1628 /* Check whether a load or store statement in the loop described by
1629 LOOP_VINFO is possible in a loop using partial vectors. This is
1630 testing whether the vectorizer pass has the appropriate support,
1631 as well as whether the target does.
1633 VLS_TYPE says whether the statement is a load or store and VECTYPE
1634 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1635 says how the load or store is going to be implemented and GROUP_SIZE
1636 is the number of load or store statements in the containing group.
1637 If the access is a gather load or scatter store, GS_INFO describes
1638 its arguments. If the load or store is conditional, SCALAR_MASK is the
1639 condition under which it occurs.
1641 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1642 vectors is not supported, otherwise record the required rgroup control
1643 types. */
1645 static void
1646 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1647 vec_load_store_type vls_type,
1648 int group_size,
1649 vect_memory_access_type
1650 memory_access_type,
1651 gather_scatter_info *gs_info,
1652 tree scalar_mask)
1654 /* Invariant loads need no special support. */
1655 if (memory_access_type == VMAT_INVARIANT)
1656 return;
1658 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1659 machine_mode vecmode = TYPE_MODE (vectype);
1660 bool is_load = (vls_type == VLS_LOAD);
1661 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1663 if (is_load
1664 ? !vect_load_lanes_supported (vectype, group_size, true)
1665 : !vect_store_lanes_supported (vectype, group_size, true))
1667 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1669 "can't operate on partial vectors because"
1670 " the target doesn't have an appropriate"
1671 " load/store-lanes instruction.\n");
1672 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1673 return;
1675 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1676 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1677 return;
1680 if (memory_access_type == VMAT_GATHER_SCATTER)
1682 internal_fn ifn = (is_load
1683 ? IFN_MASK_GATHER_LOAD
1684 : IFN_MASK_SCATTER_STORE);
1685 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1686 gs_info->memory_type,
1687 gs_info->offset_vectype,
1688 gs_info->scale))
1690 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692 "can't operate on partial vectors because"
1693 " the target doesn't have an appropriate"
1694 " gather load or scatter store instruction.\n");
1695 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1696 return;
1698 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1699 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1700 return;
1703 if (memory_access_type != VMAT_CONTIGUOUS
1704 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1706 /* Element X of the data must come from iteration i * VF + X of the
1707 scalar loop. We need more work to support other mappings. */
1708 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1710 "can't operate on partial vectors because an"
1711 " access isn't contiguous.\n");
1712 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1713 return;
1716 if (!VECTOR_MODE_P (vecmode))
1718 if (dump_enabled_p ())
1719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1720 "can't operate on partial vectors when emulating"
1721 " vector operations.\n");
1722 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1723 return;
1726 /* We might load more scalars than we need for permuting SLP loads.
1727 We checked in get_group_load_store_type that the extra elements
1728 don't leak into a new vector. */
1729 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1731 unsigned int nvectors;
1732 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1733 return nvectors;
1734 gcc_unreachable ();
1737 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1738 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1739 machine_mode mask_mode;
1740 bool using_partial_vectors_p = false;
1741 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1742 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1744 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1745 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1746 using_partial_vectors_p = true;
1749 machine_mode vmode;
1750 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1752 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1753 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1754 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1755 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1756 using_partial_vectors_p = true;
1759 if (!using_partial_vectors_p)
1761 if (dump_enabled_p ())
1762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1763 "can't operate on partial vectors because the"
1764 " target doesn't have the appropriate partial"
1765 " vectorization load or store.\n");
1766 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1770 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1771 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1772 that needs to be applied to all loads and stores in a vectorized loop.
1773 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1775 MASK_TYPE is the type of both masks. If new statements are needed,
1776 insert them before GSI. */
1778 static tree
1779 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1780 gimple_stmt_iterator *gsi)
1782 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1783 if (!loop_mask)
1784 return vec_mask;
1786 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1787 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1788 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1789 vec_mask, loop_mask);
1790 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1791 return and_res;
1794 /* Determine whether we can use a gather load or scatter store to vectorize
1795 strided load or store STMT_INFO by truncating the current offset to a
1796 smaller width. We need to be able to construct an offset vector:
1798 { 0, X, X*2, X*3, ... }
1800 without loss of precision, where X is STMT_INFO's DR_STEP.
1802 Return true if this is possible, describing the gather load or scatter
1803 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1805 static bool
1806 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1807 loop_vec_info loop_vinfo, bool masked_p,
1808 gather_scatter_info *gs_info)
1810 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1811 data_reference *dr = dr_info->dr;
1812 tree step = DR_STEP (dr);
1813 if (TREE_CODE (step) != INTEGER_CST)
1815 /* ??? Perhaps we could use range information here? */
1816 if (dump_enabled_p ())
1817 dump_printf_loc (MSG_NOTE, vect_location,
1818 "cannot truncate variable step.\n");
1819 return false;
1822 /* Get the number of bits in an element. */
1823 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1824 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1825 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1827 /* Set COUNT to the upper limit on the number of elements - 1.
1828 Start with the maximum vectorization factor. */
1829 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1831 /* Try lowering COUNT to the number of scalar latch iterations. */
1832 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1833 widest_int max_iters;
1834 if (max_loop_iterations (loop, &max_iters)
1835 && max_iters < count)
1836 count = max_iters.to_shwi ();
1838 /* Try scales of 1 and the element size. */
1839 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1840 wi::overflow_type overflow = wi::OVF_NONE;
1841 for (int i = 0; i < 2; ++i)
1843 int scale = scales[i];
1844 widest_int factor;
1845 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1846 continue;
1848 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1849 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1850 if (overflow)
1851 continue;
1852 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1853 unsigned int min_offset_bits = wi::min_precision (range, sign);
1855 /* Find the narrowest viable offset type. */
1856 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1857 tree offset_type = build_nonstandard_integer_type (offset_bits,
1858 sign == UNSIGNED);
1860 /* See whether the target supports the operation with an offset
1861 no narrower than OFFSET_TYPE. */
1862 tree memory_type = TREE_TYPE (DR_REF (dr));
1863 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1864 vectype, memory_type, offset_type, scale,
1865 &gs_info->ifn, &gs_info->offset_vectype))
1866 continue;
1868 gs_info->decl = NULL_TREE;
1869 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1870 but we don't need to store that here. */
1871 gs_info->base = NULL_TREE;
1872 gs_info->element_type = TREE_TYPE (vectype);
1873 gs_info->offset = fold_convert (offset_type, step);
1874 gs_info->offset_dt = vect_constant_def;
1875 gs_info->scale = scale;
1876 gs_info->memory_type = memory_type;
1877 return true;
1880 if (overflow && dump_enabled_p ())
1881 dump_printf_loc (MSG_NOTE, vect_location,
1882 "truncating gather/scatter offset to %d bits"
1883 " might change its value.\n", element_bits);
1885 return false;
1888 /* Return true if we can use gather/scatter internal functions to
1889 vectorize STMT_INFO, which is a grouped or strided load or store.
1890 MASKED_P is true if load or store is conditional. When returning
1891 true, fill in GS_INFO with the information required to perform the
1892 operation. */
1894 static bool
1895 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1896 loop_vec_info loop_vinfo, bool masked_p,
1897 gather_scatter_info *gs_info)
1899 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1900 || gs_info->decl)
1901 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1902 masked_p, gs_info);
1904 tree old_offset_type = TREE_TYPE (gs_info->offset);
1905 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1907 gcc_assert (TYPE_PRECISION (new_offset_type)
1908 >= TYPE_PRECISION (old_offset_type));
1909 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1911 if (dump_enabled_p ())
1912 dump_printf_loc (MSG_NOTE, vect_location,
1913 "using gather/scatter for strided/grouped access,"
1914 " scale = %d\n", gs_info->scale);
1916 return true;
1919 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1920 elements with a known constant step. Return -1 if that step
1921 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1923 static int
1924 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1926 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1927 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1928 size_zero_node);
1931 /* If the target supports a permute mask that reverses the elements in
1932 a vector of type VECTYPE, return that mask, otherwise return null. */
1934 static tree
1935 perm_mask_for_reverse (tree vectype)
1937 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1939 /* The encoding has a single stepped pattern. */
1940 vec_perm_builder sel (nunits, 1, 3);
1941 for (int i = 0; i < 3; ++i)
1942 sel.quick_push (nunits - 1 - i);
1944 vec_perm_indices indices (sel, 1, nunits);
1945 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1946 return NULL_TREE;
1947 return vect_gen_perm_mask_checked (vectype, indices);
1950 /* A subroutine of get_load_store_type, with a subset of the same
1951 arguments. Handle the case where STMT_INFO is a load or store that
1952 accesses consecutive elements with a negative step. */
1954 static vect_memory_access_type
1955 get_negative_load_store_type (vec_info *vinfo,
1956 stmt_vec_info stmt_info, tree vectype,
1957 vec_load_store_type vls_type,
1958 unsigned int ncopies)
1960 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1961 dr_alignment_support alignment_support_scheme;
1963 if (ncopies > 1)
1965 if (dump_enabled_p ())
1966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1967 "multiple types with negative step.\n");
1968 return VMAT_ELEMENTWISE;
1971 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1972 dr_info, false);
1973 if (alignment_support_scheme != dr_aligned
1974 && alignment_support_scheme != dr_unaligned_supported)
1976 if (dump_enabled_p ())
1977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1978 "negative step but alignment required.\n");
1979 return VMAT_ELEMENTWISE;
1982 if (vls_type == VLS_STORE_INVARIANT)
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_NOTE, vect_location,
1986 "negative step with invariant source;"
1987 " no permute needed.\n");
1988 return VMAT_CONTIGUOUS_DOWN;
1991 if (!perm_mask_for_reverse (vectype))
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 "negative step and reversing not supported.\n");
1996 return VMAT_ELEMENTWISE;
1999 return VMAT_CONTIGUOUS_REVERSE;
2002 /* STMT_INFO is either a masked or unconditional store. Return the value
2003 being stored. */
2005 tree
2006 vect_get_store_rhs (stmt_vec_info stmt_info)
2008 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2010 gcc_assert (gimple_assign_single_p (assign));
2011 return gimple_assign_rhs1 (assign);
2013 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2015 internal_fn ifn = gimple_call_internal_fn (call);
2016 int index = internal_fn_stored_value_index (ifn);
2017 gcc_assert (index >= 0);
2018 return gimple_call_arg (call, index);
2020 gcc_unreachable ();
2023 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2025 This function returns a vector type which can be composed with NETLS pieces,
2026 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2027 same vector size as the return vector. It checks target whether supports
2028 pieces-size vector mode for construction firstly, if target fails to, check
2029 pieces-size scalar mode for construction further. It returns NULL_TREE if
2030 fails to find the available composition.
2032 For example, for (vtype=V16QI, nelts=4), we can probably get:
2033 - V16QI with PTYPE V4QI.
2034 - V4SI with PTYPE SI.
2035 - NULL_TREE. */
2037 static tree
2038 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2040 gcc_assert (VECTOR_TYPE_P (vtype));
2041 gcc_assert (known_gt (nelts, 0U));
2043 machine_mode vmode = TYPE_MODE (vtype);
2044 if (!VECTOR_MODE_P (vmode))
2045 return NULL_TREE;
2047 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2048 unsigned int pbsize;
2049 if (constant_multiple_p (vbsize, nelts, &pbsize))
2051 /* First check if vec_init optab supports construction from
2052 vector pieces directly. */
2053 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2054 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2055 machine_mode rmode;
2056 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2057 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2058 != CODE_FOR_nothing))
2060 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2061 return vtype;
2064 /* Otherwise check if exists an integer type of the same piece size and
2065 if vec_init optab supports construction from it directly. */
2066 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2067 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2068 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2069 != CODE_FOR_nothing))
2071 *ptype = build_nonstandard_integer_type (pbsize, 1);
2072 return build_vector_type (*ptype, nelts);
2076 return NULL_TREE;
2079 /* A subroutine of get_load_store_type, with a subset of the same
2080 arguments. Handle the case where STMT_INFO is part of a grouped load
2081 or store.
2083 For stores, the statements in the group are all consecutive
2084 and there is no gap at the end. For loads, the statements in the
2085 group might not be consecutive; there can be gaps between statements
2086 as well as at the end. */
2088 static bool
2089 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2090 tree vectype, slp_tree slp_node,
2091 bool masked_p, vec_load_store_type vls_type,
2092 vect_memory_access_type *memory_access_type,
2093 dr_alignment_support *alignment_support_scheme,
2094 gather_scatter_info *gs_info)
2096 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2097 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2098 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2099 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2100 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2101 bool single_element_p = (stmt_info == first_stmt_info
2102 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2103 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2104 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2106 /* True if the vectorized statements would access beyond the last
2107 statement in the group. */
2108 bool overrun_p = false;
2110 /* True if we can cope with such overrun by peeling for gaps, so that
2111 there is at least one final scalar iteration after the vector loop. */
2112 bool can_overrun_p = (!masked_p
2113 && vls_type == VLS_LOAD
2114 && loop_vinfo
2115 && !loop->inner);
2117 /* There can only be a gap at the end of the group if the stride is
2118 known at compile time. */
2119 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2121 /* Stores can't yet have gaps. */
2122 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2124 if (slp_node)
2126 /* For SLP vectorization we directly vectorize a subchain
2127 without permutation. */
2128 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2129 first_dr_info
2130 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2131 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2133 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2134 separated by the stride, until we have a complete vector.
2135 Fall back to scalar accesses if that isn't possible. */
2136 if (multiple_p (nunits, group_size))
2137 *memory_access_type = VMAT_STRIDED_SLP;
2138 else
2139 *memory_access_type = VMAT_ELEMENTWISE;
2141 else
2143 overrun_p = loop_vinfo && gap != 0;
2144 if (overrun_p && vls_type != VLS_LOAD)
2146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2147 "Grouped store with gaps requires"
2148 " non-consecutive accesses\n");
2149 return false;
2151 /* An overrun is fine if the trailing elements are smaller
2152 than the alignment boundary B. Every vector access will
2153 be a multiple of B and so we are guaranteed to access a
2154 non-gap element in the same B-sized block. */
2155 if (overrun_p
2156 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2157 / vect_get_scalar_dr_size (first_dr_info)))
2158 overrun_p = false;
2160 /* If the gap splits the vector in half and the target
2161 can do half-vector operations avoid the epilogue peeling
2162 by simply loading half of the vector only. Usually
2163 the construction with an upper zero half will be elided. */
2164 dr_alignment_support alignment_support_scheme;
2165 tree half_vtype;
2166 if (overrun_p
2167 && !masked_p
2168 && (((alignment_support_scheme
2169 = vect_supportable_dr_alignment (vinfo,
2170 first_dr_info, false)))
2171 == dr_aligned
2172 || alignment_support_scheme == dr_unaligned_supported)
2173 && known_eq (nunits, (group_size - gap) * 2)
2174 && known_eq (nunits, group_size)
2175 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2176 != NULL_TREE))
2177 overrun_p = false;
2179 if (overrun_p && !can_overrun_p)
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "Peeling for outer loop is not supported\n");
2184 return false;
2186 int cmp = compare_step_with_zero (vinfo, stmt_info);
2187 if (cmp < 0)
2189 if (single_element_p)
2190 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2191 only correct for single element "interleaving" SLP. */
2192 *memory_access_type = get_negative_load_store_type
2193 (vinfo, stmt_info, vectype, vls_type, 1);
2194 else
2196 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2197 separated by the stride, until we have a complete vector.
2198 Fall back to scalar accesses if that isn't possible. */
2199 if (multiple_p (nunits, group_size))
2200 *memory_access_type = VMAT_STRIDED_SLP;
2201 else
2202 *memory_access_type = VMAT_ELEMENTWISE;
2205 else
2207 gcc_assert (!loop_vinfo || cmp > 0);
2208 *memory_access_type = VMAT_CONTIGUOUS;
2212 else
2214 /* We can always handle this case using elementwise accesses,
2215 but see if something more efficient is available. */
2216 *memory_access_type = VMAT_ELEMENTWISE;
2218 /* If there is a gap at the end of the group then these optimizations
2219 would access excess elements in the last iteration. */
2220 bool would_overrun_p = (gap != 0);
2221 /* An overrun is fine if the trailing elements are smaller than the
2222 alignment boundary B. Every vector access will be a multiple of B
2223 and so we are guaranteed to access a non-gap element in the
2224 same B-sized block. */
2225 if (would_overrun_p
2226 && !masked_p
2227 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2228 / vect_get_scalar_dr_size (first_dr_info)))
2229 would_overrun_p = false;
2231 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2232 && (can_overrun_p || !would_overrun_p)
2233 && compare_step_with_zero (vinfo, stmt_info) > 0)
2235 /* First cope with the degenerate case of a single-element
2236 vector. */
2237 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2240 /* Otherwise try using LOAD/STORE_LANES. */
2241 else if (vls_type == VLS_LOAD
2242 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2243 : vect_store_lanes_supported (vectype, group_size,
2244 masked_p))
2246 *memory_access_type = VMAT_LOAD_STORE_LANES;
2247 overrun_p = would_overrun_p;
2250 /* If that fails, try using permuting loads. */
2251 else if (vls_type == VLS_LOAD
2252 ? vect_grouped_load_supported (vectype, single_element_p,
2253 group_size)
2254 : vect_grouped_store_supported (vectype, group_size))
2256 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2257 overrun_p = would_overrun_p;
2261 /* As a last resort, trying using a gather load or scatter store.
2263 ??? Although the code can handle all group sizes correctly,
2264 it probably isn't a win to use separate strided accesses based
2265 on nearby locations. Or, even if it's a win over scalar code,
2266 it might not be a win over vectorizing at a lower VF, if that
2267 allows us to use contiguous accesses. */
2268 if (*memory_access_type == VMAT_ELEMENTWISE
2269 && single_element_p
2270 && loop_vinfo
2271 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2272 masked_p, gs_info))
2273 *memory_access_type = VMAT_GATHER_SCATTER;
2276 if (*memory_access_type == VMAT_GATHER_SCATTER
2277 || *memory_access_type == VMAT_ELEMENTWISE)
2278 *alignment_support_scheme = dr_unaligned_supported;
2279 else
2280 *alignment_support_scheme
2281 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2283 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2285 /* STMT is the leader of the group. Check the operands of all the
2286 stmts of the group. */
2287 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2288 while (next_stmt_info)
2290 tree op = vect_get_store_rhs (next_stmt_info);
2291 enum vect_def_type dt;
2292 if (!vect_is_simple_use (op, vinfo, &dt))
2294 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2296 "use not simple.\n");
2297 return false;
2299 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2303 if (overrun_p)
2305 gcc_assert (can_overrun_p);
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 "Data access with gaps requires scalar "
2309 "epilogue loop\n");
2310 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2313 return true;
2316 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2317 if there is a memory access type that the vectorized form can use,
2318 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2319 or scatters, fill in GS_INFO accordingly. In addition
2320 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2321 the target does not support the alignment scheme.
2323 SLP says whether we're performing SLP rather than loop vectorization.
2324 MASKED_P is true if the statement is conditional on a vectorized mask.
2325 VECTYPE is the vector type that the vectorized statements will use.
2326 NCOPIES is the number of vector statements that will be needed. */
2328 static bool
2329 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2330 tree vectype, slp_tree slp_node,
2331 bool masked_p, vec_load_store_type vls_type,
2332 unsigned int ncopies,
2333 vect_memory_access_type *memory_access_type,
2334 dr_alignment_support *alignment_support_scheme,
2335 gather_scatter_info *gs_info)
2337 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2338 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2339 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2341 *memory_access_type = VMAT_GATHER_SCATTER;
2342 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2343 gcc_unreachable ();
2344 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2345 &gs_info->offset_dt,
2346 &gs_info->offset_vectype))
2348 if (dump_enabled_p ())
2349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2350 "%s index use not simple.\n",
2351 vls_type == VLS_LOAD ? "gather" : "scatter");
2352 return false;
2354 /* Gather-scatter accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme = dr_unaligned_supported;
2358 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2360 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2361 masked_p,
2362 vls_type, memory_access_type,
2363 alignment_support_scheme, gs_info))
2364 return false;
2366 else if (STMT_VINFO_STRIDED_P (stmt_info))
2368 gcc_assert (!slp_node);
2369 if (loop_vinfo
2370 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2371 masked_p, gs_info))
2372 *memory_access_type = VMAT_GATHER_SCATTER;
2373 else
2374 *memory_access_type = VMAT_ELEMENTWISE;
2375 /* Alignment is irrelevant here. */
2376 *alignment_support_scheme = dr_unaligned_supported;
2378 else
2380 int cmp = compare_step_with_zero (vinfo, stmt_info);
2381 if (cmp < 0)
2382 *memory_access_type = get_negative_load_store_type
2383 (vinfo, stmt_info, vectype, vls_type, ncopies);
2384 else if (cmp == 0)
2386 gcc_assert (vls_type == VLS_LOAD);
2387 *memory_access_type = VMAT_INVARIANT;
2389 else
2390 *memory_access_type = VMAT_CONTIGUOUS;
2391 *alignment_support_scheme
2392 = vect_supportable_dr_alignment (vinfo,
2393 STMT_VINFO_DR_INFO (stmt_info), false);
2396 if ((*memory_access_type == VMAT_ELEMENTWISE
2397 || *memory_access_type == VMAT_STRIDED_SLP)
2398 && !nunits.is_constant ())
2400 if (dump_enabled_p ())
2401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2402 "Not using elementwise accesses due to variable "
2403 "vectorization factor.\n");
2404 return false;
2407 if (*alignment_support_scheme == dr_unaligned_unsupported)
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 "unsupported unaligned access\n");
2412 return false;
2415 /* FIXME: At the moment the cost model seems to underestimate the
2416 cost of using elementwise accesses. This check preserves the
2417 traditional behavior until that can be fixed. */
2418 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2419 if (!first_stmt_info)
2420 first_stmt_info = stmt_info;
2421 if (*memory_access_type == VMAT_ELEMENTWISE
2422 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2423 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2424 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2425 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "not falling back to elementwise accesses\n");
2430 return false;
2432 return true;
2435 /* Return true if boolean argument MASK is suitable for vectorizing
2436 conditional operation STMT_INFO. When returning true, store the type
2437 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2438 in *MASK_VECTYPE_OUT. */
2440 static bool
2441 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2442 vect_def_type *mask_dt_out,
2443 tree *mask_vectype_out)
2445 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2447 if (dump_enabled_p ())
2448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2449 "mask argument is not a boolean.\n");
2450 return false;
2453 if (TREE_CODE (mask) != SSA_NAME)
2455 if (dump_enabled_p ())
2456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2457 "mask argument is not an SSA name.\n");
2458 return false;
2461 enum vect_def_type mask_dt;
2462 tree mask_vectype;
2463 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2465 if (dump_enabled_p ())
2466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2467 "mask use not simple.\n");
2468 return false;
2471 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2472 if (!mask_vectype)
2473 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2475 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2477 if (dump_enabled_p ())
2478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2479 "could not find an appropriate vector mask type.\n");
2480 return false;
2483 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2484 TYPE_VECTOR_SUBPARTS (vectype)))
2486 if (dump_enabled_p ())
2487 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2488 "vector mask type %T"
2489 " does not match vector data type %T.\n",
2490 mask_vectype, vectype);
2492 return false;
2495 *mask_dt_out = mask_dt;
2496 *mask_vectype_out = mask_vectype;
2497 return true;
2500 /* Return true if stored value RHS is suitable for vectorizing store
2501 statement STMT_INFO. When returning true, store the type of the
2502 definition in *RHS_DT_OUT, the type of the vectorized store value in
2503 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2505 static bool
2506 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2507 slp_tree slp_node, tree rhs,
2508 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2509 vec_load_store_type *vls_type_out)
2511 /* In the case this is a store from a constant make sure
2512 native_encode_expr can handle it. */
2513 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2515 if (dump_enabled_p ())
2516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2517 "cannot encode constant as a byte sequence.\n");
2518 return false;
2521 enum vect_def_type rhs_dt;
2522 tree rhs_vectype;
2523 slp_tree slp_op;
2524 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2525 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2527 if (dump_enabled_p ())
2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529 "use not simple.\n");
2530 return false;
2533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2538 "incompatible vector types.\n");
2539 return false;
2542 *rhs_dt_out = rhs_dt;
2543 *rhs_vectype_out = rhs_vectype;
2544 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2545 *vls_type_out = VLS_STORE_INVARIANT;
2546 else
2547 *vls_type_out = VLS_STORE;
2548 return true;
2551 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2552 Note that we support masks with floating-point type, in which case the
2553 floats are interpreted as a bitmask. */
2555 static tree
2556 vect_build_all_ones_mask (vec_info *vinfo,
2557 stmt_vec_info stmt_info, tree masktype)
2559 if (TREE_CODE (masktype) == INTEGER_TYPE)
2560 return build_int_cst (masktype, -1);
2561 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2563 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2564 mask = build_vector_from_val (masktype, mask);
2565 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2567 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2569 REAL_VALUE_TYPE r;
2570 long tmp[6];
2571 for (int j = 0; j < 6; ++j)
2572 tmp[j] = -1;
2573 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2574 tree mask = build_real (TREE_TYPE (masktype), r);
2575 mask = build_vector_from_val (masktype, mask);
2576 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2578 gcc_unreachable ();
2581 /* Build an all-zero merge value of type VECTYPE while vectorizing
2582 STMT_INFO as a gather load. */
2584 static tree
2585 vect_build_zero_merge_argument (vec_info *vinfo,
2586 stmt_vec_info stmt_info, tree vectype)
2588 tree merge;
2589 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2590 merge = build_int_cst (TREE_TYPE (vectype), 0);
2591 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2593 REAL_VALUE_TYPE r;
2594 long tmp[6];
2595 for (int j = 0; j < 6; ++j)
2596 tmp[j] = 0;
2597 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2598 merge = build_real (TREE_TYPE (vectype), r);
2600 else
2601 gcc_unreachable ();
2602 merge = build_vector_from_val (vectype, merge);
2603 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2606 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2607 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2608 the gather load operation. If the load is conditional, MASK is the
2609 unvectorized condition and MASK_DT is its definition type, otherwise
2610 MASK is null. */
2612 static void
2613 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2614 gimple_stmt_iterator *gsi,
2615 gimple **vec_stmt,
2616 gather_scatter_info *gs_info,
2617 tree mask)
2619 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2620 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2621 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2622 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2623 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2624 edge pe = loop_preheader_edge (loop);
2625 enum { NARROW, NONE, WIDEN } modifier;
2626 poly_uint64 gather_off_nunits
2627 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2629 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2630 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2631 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2632 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2633 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2634 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2635 tree scaletype = TREE_VALUE (arglist);
2636 tree real_masktype = masktype;
2637 gcc_checking_assert (types_compatible_p (srctype, rettype)
2638 && (!mask
2639 || TREE_CODE (masktype) == INTEGER_TYPE
2640 || types_compatible_p (srctype, masktype)));
2641 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2642 masktype = truth_type_for (srctype);
2644 tree mask_halftype = masktype;
2645 tree perm_mask = NULL_TREE;
2646 tree mask_perm_mask = NULL_TREE;
2647 if (known_eq (nunits, gather_off_nunits))
2648 modifier = NONE;
2649 else if (known_eq (nunits * 2, gather_off_nunits))
2651 modifier = WIDEN;
2653 /* Currently widening gathers and scatters are only supported for
2654 fixed-length vectors. */
2655 int count = gather_off_nunits.to_constant ();
2656 vec_perm_builder sel (count, count, 1);
2657 for (int i = 0; i < count; ++i)
2658 sel.quick_push (i | (count / 2));
2660 vec_perm_indices indices (sel, 1, count);
2661 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2662 indices);
2664 else if (known_eq (nunits, gather_off_nunits * 2))
2666 modifier = NARROW;
2668 /* Currently narrowing gathers and scatters are only supported for
2669 fixed-length vectors. */
2670 int count = nunits.to_constant ();
2671 vec_perm_builder sel (count, count, 1);
2672 sel.quick_grow (count);
2673 for (int i = 0; i < count; ++i)
2674 sel[i] = i < count / 2 ? i : i + count / 2;
2675 vec_perm_indices indices (sel, 2, count);
2676 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2678 ncopies *= 2;
2680 if (mask && masktype == real_masktype)
2682 for (int i = 0; i < count; ++i)
2683 sel[i] = i | (count / 2);
2684 indices.new_vector (sel, 2, count);
2685 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2687 else if (mask)
2688 mask_halftype = truth_type_for (gs_info->offset_vectype);
2690 else
2691 gcc_unreachable ();
2693 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2694 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2696 tree ptr = fold_convert (ptrtype, gs_info->base);
2697 if (!is_gimple_min_invariant (ptr))
2699 gimple_seq seq;
2700 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2701 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2702 gcc_assert (!new_bb);
2705 tree scale = build_int_cst (scaletype, gs_info->scale);
2707 tree vec_oprnd0 = NULL_TREE;
2708 tree vec_mask = NULL_TREE;
2709 tree src_op = NULL_TREE;
2710 tree mask_op = NULL_TREE;
2711 tree prev_res = NULL_TREE;
2713 if (!mask)
2715 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2716 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2719 auto_vec<tree> vec_oprnds0;
2720 auto_vec<tree> vec_masks;
2721 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2722 modifier == WIDEN ? ncopies / 2 : ncopies,
2723 gs_info->offset, &vec_oprnds0);
2724 if (mask)
2725 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2726 modifier == NARROW ? ncopies / 2 : ncopies,
2727 mask, &vec_masks);
2728 for (int j = 0; j < ncopies; ++j)
2730 tree op, var;
2731 if (modifier == WIDEN && (j & 1))
2732 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2733 perm_mask, stmt_info, gsi);
2734 else
2735 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2737 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2739 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2740 TYPE_VECTOR_SUBPARTS (idxtype)));
2741 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2742 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2743 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2744 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2745 op = var;
2748 if (mask)
2750 if (mask_perm_mask && (j & 1))
2751 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2752 mask_perm_mask, stmt_info, gsi);
2753 else
2755 if (modifier == NARROW)
2757 if ((j & 1) == 0)
2758 vec_mask = vec_masks[j / 2];
2760 else
2761 vec_mask = vec_masks[j];
2763 mask_op = vec_mask;
2764 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2766 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2767 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2768 gcc_assert (known_eq (sub1, sub2));
2769 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2770 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2771 gassign *new_stmt
2772 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2773 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2774 mask_op = var;
2777 if (modifier == NARROW && masktype != real_masktype)
2779 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2780 gassign *new_stmt
2781 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2782 : VEC_UNPACK_LO_EXPR,
2783 mask_op);
2784 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2785 mask_op = var;
2787 src_op = mask_op;
2790 tree mask_arg = mask_op;
2791 if (masktype != real_masktype)
2793 tree utype, optype = TREE_TYPE (mask_op);
2794 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2795 utype = real_masktype;
2796 else
2797 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2798 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2799 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2800 gassign *new_stmt
2801 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2802 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2803 mask_arg = var;
2804 if (!useless_type_conversion_p (real_masktype, utype))
2806 gcc_assert (TYPE_PRECISION (utype)
2807 <= TYPE_PRECISION (real_masktype));
2808 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2809 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2810 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2811 mask_arg = var;
2813 src_op = build_zero_cst (srctype);
2815 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2816 mask_arg, scale);
2818 if (!useless_type_conversion_p (vectype, rettype))
2820 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2821 TYPE_VECTOR_SUBPARTS (rettype)));
2822 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2823 gimple_call_set_lhs (new_stmt, op);
2824 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2825 var = make_ssa_name (vec_dest);
2826 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2827 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2828 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2830 else
2832 var = make_ssa_name (vec_dest, new_stmt);
2833 gimple_call_set_lhs (new_stmt, var);
2834 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2837 if (modifier == NARROW)
2839 if ((j & 1) == 0)
2841 prev_res = var;
2842 continue;
2844 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2845 stmt_info, gsi);
2846 new_stmt = SSA_NAME_DEF_STMT (var);
2849 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2851 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2854 /* Prepare the base and offset in GS_INFO for vectorization.
2855 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2856 to the vectorized offset argument for the first copy of STMT_INFO.
2857 STMT_INFO is the statement described by GS_INFO and LOOP is the
2858 containing loop. */
2860 static void
2861 vect_get_gather_scatter_ops (vec_info *vinfo,
2862 class loop *loop, stmt_vec_info stmt_info,
2863 gather_scatter_info *gs_info,
2864 tree *dataref_ptr, vec<tree> *vec_offset,
2865 unsigned ncopies)
2867 gimple_seq stmts = NULL;
2868 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2869 if (stmts != NULL)
2871 basic_block new_bb;
2872 edge pe = loop_preheader_edge (loop);
2873 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2874 gcc_assert (!new_bb);
2876 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2877 vec_offset, gs_info->offset_vectype);
2880 /* Prepare to implement a grouped or strided load or store using
2881 the gather load or scatter store operation described by GS_INFO.
2882 STMT_INFO is the load or store statement.
2884 Set *DATAREF_BUMP to the amount that should be added to the base
2885 address after each copy of the vectorized statement. Set *VEC_OFFSET
2886 to an invariant offset vector in which element I has the value
2887 I * DR_STEP / SCALE. */
2889 static void
2890 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2891 loop_vec_info loop_vinfo,
2892 gather_scatter_info *gs_info,
2893 tree *dataref_bump, tree *vec_offset)
2895 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2896 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2898 tree bump = size_binop (MULT_EXPR,
2899 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2900 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2901 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2903 /* The offset given in GS_INFO can have pointer type, so use the element
2904 type of the vector instead. */
2905 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2907 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2908 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2909 ssize_int (gs_info->scale));
2910 step = fold_convert (offset_type, step);
2912 /* Create {0, X, X*2, X*3, ...}. */
2913 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2914 build_zero_cst (offset_type), step);
2915 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2918 /* Return the amount that should be added to a vector pointer to move
2919 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2920 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2921 vectorization. */
2923 static tree
2924 vect_get_data_ptr_increment (vec_info *vinfo,
2925 dr_vec_info *dr_info, tree aggr_type,
2926 vect_memory_access_type memory_access_type)
2928 if (memory_access_type == VMAT_INVARIANT)
2929 return size_zero_node;
2931 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2932 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2933 if (tree_int_cst_sgn (step) == -1)
2934 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2935 return iv_step;
2938 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2940 static bool
2941 vectorizable_bswap (vec_info *vinfo,
2942 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2943 gimple **vec_stmt, slp_tree slp_node,
2944 slp_tree *slp_op,
2945 tree vectype_in, stmt_vector_for_cost *cost_vec)
2947 tree op, vectype;
2948 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2949 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2950 unsigned ncopies;
2952 op = gimple_call_arg (stmt, 0);
2953 vectype = STMT_VINFO_VECTYPE (stmt_info);
2954 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2956 /* Multiple types in SLP are handled by creating the appropriate number of
2957 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2958 case of SLP. */
2959 if (slp_node)
2960 ncopies = 1;
2961 else
2962 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2964 gcc_assert (ncopies >= 1);
2966 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2967 if (! char_vectype)
2968 return false;
2970 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2971 unsigned word_bytes;
2972 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2973 return false;
2975 /* The encoding uses one stepped pattern for each byte in the word. */
2976 vec_perm_builder elts (num_bytes, word_bytes, 3);
2977 for (unsigned i = 0; i < 3; ++i)
2978 for (unsigned j = 0; j < word_bytes; ++j)
2979 elts.quick_push ((i + 1) * word_bytes - j - 1);
2981 vec_perm_indices indices (elts, 1, num_bytes);
2982 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2983 return false;
2985 if (! vec_stmt)
2987 if (slp_node
2988 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2990 if (dump_enabled_p ())
2991 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2992 "incompatible vector types for invariants\n");
2993 return false;
2996 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2997 DUMP_VECT_SCOPE ("vectorizable_bswap");
2998 if (! slp_node)
3000 record_stmt_cost (cost_vec,
3001 1, vector_stmt, stmt_info, 0, vect_prologue);
3002 record_stmt_cost (cost_vec,
3003 ncopies, vec_perm, stmt_info, 0, vect_body);
3005 return true;
3008 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3010 /* Transform. */
3011 vec<tree> vec_oprnds = vNULL;
3012 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3013 op, &vec_oprnds);
3014 /* Arguments are ready. create the new vector stmt. */
3015 unsigned i;
3016 tree vop;
3017 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3019 gimple *new_stmt;
3020 tree tem = make_ssa_name (char_vectype);
3021 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3022 char_vectype, vop));
3023 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3024 tree tem2 = make_ssa_name (char_vectype);
3025 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3026 tem, tem, bswap_vconst);
3027 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3028 tem = make_ssa_name (vectype);
3029 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3030 vectype, tem2));
3031 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3032 if (slp_node)
3033 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3034 else
3035 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3038 if (!slp_node)
3039 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3041 vec_oprnds.release ();
3042 return true;
3045 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3046 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3047 in a single step. On success, store the binary pack code in
3048 *CONVERT_CODE. */
3050 static bool
3051 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3052 tree_code *convert_code)
3054 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3055 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3056 return false;
3058 tree_code code;
3059 int multi_step_cvt = 0;
3060 auto_vec <tree, 8> interm_types;
3061 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3062 &code, &multi_step_cvt, &interm_types)
3063 || multi_step_cvt)
3064 return false;
3066 *convert_code = code;
3067 return true;
3070 /* Function vectorizable_call.
3072 Check if STMT_INFO performs a function call that can be vectorized.
3073 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3074 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3075 Return true if STMT_INFO is vectorizable in this way. */
3077 static bool
3078 vectorizable_call (vec_info *vinfo,
3079 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3080 gimple **vec_stmt, slp_tree slp_node,
3081 stmt_vector_for_cost *cost_vec)
3083 gcall *stmt;
3084 tree vec_dest;
3085 tree scalar_dest;
3086 tree op;
3087 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3088 tree vectype_out, vectype_in;
3089 poly_uint64 nunits_in;
3090 poly_uint64 nunits_out;
3091 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3092 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3093 tree fndecl, new_temp, rhs_type;
3094 enum vect_def_type dt[4]
3095 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3096 vect_unknown_def_type };
3097 tree vectypes[ARRAY_SIZE (dt)] = {};
3098 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3099 int ndts = ARRAY_SIZE (dt);
3100 int ncopies, j;
3101 auto_vec<tree, 8> vargs;
3102 auto_vec<tree, 8> orig_vargs;
3103 enum { NARROW, NONE, WIDEN } modifier;
3104 size_t i, nargs;
3105 tree lhs;
3107 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3108 return false;
3110 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3111 && ! vec_stmt)
3112 return false;
3114 /* Is STMT_INFO a vectorizable call? */
3115 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3116 if (!stmt)
3117 return false;
3119 if (gimple_call_internal_p (stmt)
3120 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3121 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3122 /* Handled by vectorizable_load and vectorizable_store. */
3123 return false;
3125 if (gimple_call_lhs (stmt) == NULL_TREE
3126 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3127 return false;
3129 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3131 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3133 /* Process function arguments. */
3134 rhs_type = NULL_TREE;
3135 vectype_in = NULL_TREE;
3136 nargs = gimple_call_num_args (stmt);
3138 /* Bail out if the function has more than four arguments, we do not have
3139 interesting builtin functions to vectorize with more than two arguments
3140 except for fma. No arguments is also not good. */
3141 if (nargs == 0 || nargs > 4)
3142 return false;
3144 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3145 combined_fn cfn = gimple_call_combined_fn (stmt);
3146 if (cfn == CFN_GOMP_SIMD_LANE)
3148 nargs = 0;
3149 rhs_type = unsigned_type_node;
3152 int mask_opno = -1;
3153 if (internal_fn_p (cfn))
3154 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3156 for (i = 0; i < nargs; i++)
3158 if ((int) i == mask_opno)
3160 op = gimple_call_arg (stmt, i);
3161 if (!vect_check_scalar_mask (vinfo,
3162 stmt_info, op, &dt[i], &vectypes[i]))
3163 return false;
3164 continue;
3167 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3168 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3170 if (dump_enabled_p ())
3171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3172 "use not simple.\n");
3173 return false;
3176 /* We can only handle calls with arguments of the same type. */
3177 if (rhs_type
3178 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3180 if (dump_enabled_p ())
3181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3182 "argument types differ.\n");
3183 return false;
3185 if (!rhs_type)
3186 rhs_type = TREE_TYPE (op);
3188 if (!vectype_in)
3189 vectype_in = vectypes[i];
3190 else if (vectypes[i]
3191 && !types_compatible_p (vectypes[i], vectype_in))
3193 if (dump_enabled_p ())
3194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3195 "argument vector types differ.\n");
3196 return false;
3199 /* If all arguments are external or constant defs, infer the vector type
3200 from the scalar type. */
3201 if (!vectype_in)
3202 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3203 if (vec_stmt)
3204 gcc_assert (vectype_in);
3205 if (!vectype_in)
3207 if (dump_enabled_p ())
3208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3209 "no vectype for scalar type %T\n", rhs_type);
3211 return false;
3213 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3214 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3215 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3216 by a pack of the two vectors into an SI vector. We would need
3217 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3218 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3220 if (dump_enabled_p ())
3221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3222 "mismatched vector sizes %T and %T\n",
3223 vectype_in, vectype_out);
3224 return false;
3227 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3228 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3230 if (dump_enabled_p ())
3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3232 "mixed mask and nonmask vector types\n");
3233 return false;
3236 /* FORNOW */
3237 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3238 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3239 if (known_eq (nunits_in * 2, nunits_out))
3240 modifier = NARROW;
3241 else if (known_eq (nunits_out, nunits_in))
3242 modifier = NONE;
3243 else if (known_eq (nunits_out * 2, nunits_in))
3244 modifier = WIDEN;
3245 else
3246 return false;
3248 /* We only handle functions that do not read or clobber memory. */
3249 if (gimple_vuse (stmt))
3251 if (dump_enabled_p ())
3252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3253 "function reads from or writes to memory.\n");
3254 return false;
3257 /* For now, we only vectorize functions if a target specific builtin
3258 is available. TODO -- in some cases, it might be profitable to
3259 insert the calls for pieces of the vector, in order to be able
3260 to vectorize other operations in the loop. */
3261 fndecl = NULL_TREE;
3262 internal_fn ifn = IFN_LAST;
3263 tree callee = gimple_call_fndecl (stmt);
3265 /* First try using an internal function. */
3266 tree_code convert_code = ERROR_MARK;
3267 if (cfn != CFN_LAST
3268 && (modifier == NONE
3269 || (modifier == NARROW
3270 && simple_integer_narrowing (vectype_out, vectype_in,
3271 &convert_code))))
3272 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3273 vectype_in);
3275 /* If that fails, try asking for a target-specific built-in function. */
3276 if (ifn == IFN_LAST)
3278 if (cfn != CFN_LAST)
3279 fndecl = targetm.vectorize.builtin_vectorized_function
3280 (cfn, vectype_out, vectype_in);
3281 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3282 fndecl = targetm.vectorize.builtin_md_vectorized_function
3283 (callee, vectype_out, vectype_in);
3286 if (ifn == IFN_LAST && !fndecl)
3288 if (cfn == CFN_GOMP_SIMD_LANE
3289 && !slp_node
3290 && loop_vinfo
3291 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3292 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3293 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3294 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3296 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3297 { 0, 1, 2, ... vf - 1 } vector. */
3298 gcc_assert (nargs == 0);
3300 else if (modifier == NONE
3301 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3302 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3303 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3304 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3305 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3306 slp_op, vectype_in, cost_vec);
3307 else
3309 if (dump_enabled_p ())
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3311 "function is not vectorizable.\n");
3312 return false;
3316 if (slp_node)
3317 ncopies = 1;
3318 else if (modifier == NARROW && ifn == IFN_LAST)
3319 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3320 else
3321 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3323 /* Sanity check: make sure that at least one copy of the vectorized stmt
3324 needs to be generated. */
3325 gcc_assert (ncopies >= 1);
3327 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3328 if (!vec_stmt) /* transformation not required. */
3330 if (slp_node)
3331 for (i = 0; i < nargs; ++i)
3332 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3334 if (dump_enabled_p ())
3335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3336 "incompatible vector types for invariants\n");
3337 return false;
3339 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3340 DUMP_VECT_SCOPE ("vectorizable_call");
3341 vect_model_simple_cost (vinfo, stmt_info,
3342 ncopies, dt, ndts, slp_node, cost_vec);
3343 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3344 record_stmt_cost (cost_vec, ncopies / 2,
3345 vec_promote_demote, stmt_info, 0, vect_body);
3347 if (loop_vinfo && mask_opno >= 0)
3349 unsigned int nvectors = (slp_node
3350 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3351 : ncopies);
3352 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3353 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3354 vectype_out, scalar_mask);
3356 return true;
3359 /* Transform. */
3361 if (dump_enabled_p ())
3362 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3364 /* Handle def. */
3365 scalar_dest = gimple_call_lhs (stmt);
3366 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3368 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3370 if (modifier == NONE || ifn != IFN_LAST)
3372 tree prev_res = NULL_TREE;
3373 vargs.safe_grow (nargs, true);
3374 orig_vargs.safe_grow (nargs, true);
3375 auto_vec<vec<tree> > vec_defs (nargs);
3376 for (j = 0; j < ncopies; ++j)
3378 /* Build argument list for the vectorized call. */
3379 if (slp_node)
3381 vec<tree> vec_oprnds0;
3383 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3384 vec_oprnds0 = vec_defs[0];
3386 /* Arguments are ready. Create the new vector stmt. */
3387 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3389 size_t k;
3390 for (k = 0; k < nargs; k++)
3392 vec<tree> vec_oprndsk = vec_defs[k];
3393 vargs[k] = vec_oprndsk[i];
3395 gimple *new_stmt;
3396 if (modifier == NARROW)
3398 /* We don't define any narrowing conditional functions
3399 at present. */
3400 gcc_assert (mask_opno < 0);
3401 tree half_res = make_ssa_name (vectype_in);
3402 gcall *call
3403 = gimple_build_call_internal_vec (ifn, vargs);
3404 gimple_call_set_lhs (call, half_res);
3405 gimple_call_set_nothrow (call, true);
3406 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3407 if ((i & 1) == 0)
3409 prev_res = half_res;
3410 continue;
3412 new_temp = make_ssa_name (vec_dest);
3413 new_stmt = gimple_build_assign (new_temp, convert_code,
3414 prev_res, half_res);
3415 vect_finish_stmt_generation (vinfo, stmt_info,
3416 new_stmt, gsi);
3418 else
3420 if (mask_opno >= 0 && masked_loop_p)
3422 unsigned int vec_num = vec_oprnds0.length ();
3423 /* Always true for SLP. */
3424 gcc_assert (ncopies == 1);
3425 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3426 vectype_out, i);
3427 vargs[mask_opno] = prepare_load_store_mask
3428 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3431 gcall *call;
3432 if (ifn != IFN_LAST)
3433 call = gimple_build_call_internal_vec (ifn, vargs);
3434 else
3435 call = gimple_build_call_vec (fndecl, vargs);
3436 new_temp = make_ssa_name (vec_dest, call);
3437 gimple_call_set_lhs (call, new_temp);
3438 gimple_call_set_nothrow (call, true);
3439 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3440 new_stmt = call;
3442 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3444 continue;
3447 for (i = 0; i < nargs; i++)
3449 op = gimple_call_arg (stmt, i);
3450 if (j == 0)
3452 vec_defs.quick_push (vNULL);
3453 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3454 op, &vec_defs[i]);
3456 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3459 if (mask_opno >= 0 && masked_loop_p)
3461 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3462 vectype_out, j);
3463 vargs[mask_opno]
3464 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3465 vargs[mask_opno], gsi);
3468 gimple *new_stmt;
3469 if (cfn == CFN_GOMP_SIMD_LANE)
3471 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3472 tree new_var
3473 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3474 gimple *init_stmt = gimple_build_assign (new_var, cst);
3475 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3476 new_temp = make_ssa_name (vec_dest);
3477 new_stmt = gimple_build_assign (new_temp, new_var);
3478 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3480 else if (modifier == NARROW)
3482 /* We don't define any narrowing conditional functions at
3483 present. */
3484 gcc_assert (mask_opno < 0);
3485 tree half_res = make_ssa_name (vectype_in);
3486 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3487 gimple_call_set_lhs (call, half_res);
3488 gimple_call_set_nothrow (call, true);
3489 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3490 if ((j & 1) == 0)
3492 prev_res = half_res;
3493 continue;
3495 new_temp = make_ssa_name (vec_dest);
3496 new_stmt = gimple_build_assign (new_temp, convert_code,
3497 prev_res, half_res);
3498 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3500 else
3502 gcall *call;
3503 if (ifn != IFN_LAST)
3504 call = gimple_build_call_internal_vec (ifn, vargs);
3505 else
3506 call = gimple_build_call_vec (fndecl, vargs);
3507 new_temp = make_ssa_name (vec_dest, call);
3508 gimple_call_set_lhs (call, new_temp);
3509 gimple_call_set_nothrow (call, true);
3510 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3511 new_stmt = call;
3514 if (j == (modifier == NARROW ? 1 : 0))
3515 *vec_stmt = new_stmt;
3516 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3518 for (i = 0; i < nargs; i++)
3520 vec<tree> vec_oprndsi = vec_defs[i];
3521 vec_oprndsi.release ();
3524 else if (modifier == NARROW)
3526 auto_vec<vec<tree> > vec_defs (nargs);
3527 /* We don't define any narrowing conditional functions at present. */
3528 gcc_assert (mask_opno < 0);
3529 for (j = 0; j < ncopies; ++j)
3531 /* Build argument list for the vectorized call. */
3532 if (j == 0)
3533 vargs.create (nargs * 2);
3534 else
3535 vargs.truncate (0);
3537 if (slp_node)
3539 vec<tree> vec_oprnds0;
3541 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3542 vec_oprnds0 = vec_defs[0];
3544 /* Arguments are ready. Create the new vector stmt. */
3545 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3547 size_t k;
3548 vargs.truncate (0);
3549 for (k = 0; k < nargs; k++)
3551 vec<tree> vec_oprndsk = vec_defs[k];
3552 vargs.quick_push (vec_oprndsk[i]);
3553 vargs.quick_push (vec_oprndsk[i + 1]);
3555 gcall *call;
3556 if (ifn != IFN_LAST)
3557 call = gimple_build_call_internal_vec (ifn, vargs);
3558 else
3559 call = gimple_build_call_vec (fndecl, vargs);
3560 new_temp = make_ssa_name (vec_dest, call);
3561 gimple_call_set_lhs (call, new_temp);
3562 gimple_call_set_nothrow (call, true);
3563 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3564 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3566 continue;
3569 for (i = 0; i < nargs; i++)
3571 op = gimple_call_arg (stmt, i);
3572 if (j == 0)
3574 vec_defs.quick_push (vNULL);
3575 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3576 op, &vec_defs[i], vectypes[i]);
3578 vec_oprnd0 = vec_defs[i][2*j];
3579 vec_oprnd1 = vec_defs[i][2*j+1];
3581 vargs.quick_push (vec_oprnd0);
3582 vargs.quick_push (vec_oprnd1);
3585 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3586 new_temp = make_ssa_name (vec_dest, new_stmt);
3587 gimple_call_set_lhs (new_stmt, new_temp);
3588 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3590 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3593 if (!slp_node)
3594 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3596 for (i = 0; i < nargs; i++)
3598 vec<tree> vec_oprndsi = vec_defs[i];
3599 vec_oprndsi.release ();
3602 else
3603 /* No current target implements this case. */
3604 return false;
3606 vargs.release ();
3608 /* The call in STMT might prevent it from being removed in dce.
3609 We however cannot remove it here, due to the way the ssa name
3610 it defines is mapped to the new definition. So just replace
3611 rhs of the statement with something harmless. */
3613 if (slp_node)
3614 return true;
3616 stmt_info = vect_orig_stmt (stmt_info);
3617 lhs = gimple_get_lhs (stmt_info->stmt);
3619 gassign *new_stmt
3620 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3621 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3623 return true;
3627 struct simd_call_arg_info
3629 tree vectype;
3630 tree op;
3631 HOST_WIDE_INT linear_step;
3632 enum vect_def_type dt;
3633 unsigned int align;
3634 bool simd_lane_linear;
3637 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3638 is linear within simd lane (but not within whole loop), note it in
3639 *ARGINFO. */
3641 static void
3642 vect_simd_lane_linear (tree op, class loop *loop,
3643 struct simd_call_arg_info *arginfo)
3645 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3647 if (!is_gimple_assign (def_stmt)
3648 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3649 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3650 return;
3652 tree base = gimple_assign_rhs1 (def_stmt);
3653 HOST_WIDE_INT linear_step = 0;
3654 tree v = gimple_assign_rhs2 (def_stmt);
3655 while (TREE_CODE (v) == SSA_NAME)
3657 tree t;
3658 def_stmt = SSA_NAME_DEF_STMT (v);
3659 if (is_gimple_assign (def_stmt))
3660 switch (gimple_assign_rhs_code (def_stmt))
3662 case PLUS_EXPR:
3663 t = gimple_assign_rhs2 (def_stmt);
3664 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3665 return;
3666 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3667 v = gimple_assign_rhs1 (def_stmt);
3668 continue;
3669 case MULT_EXPR:
3670 t = gimple_assign_rhs2 (def_stmt);
3671 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3672 return;
3673 linear_step = tree_to_shwi (t);
3674 v = gimple_assign_rhs1 (def_stmt);
3675 continue;
3676 CASE_CONVERT:
3677 t = gimple_assign_rhs1 (def_stmt);
3678 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3679 || (TYPE_PRECISION (TREE_TYPE (v))
3680 < TYPE_PRECISION (TREE_TYPE (t))))
3681 return;
3682 if (!linear_step)
3683 linear_step = 1;
3684 v = t;
3685 continue;
3686 default:
3687 return;
3689 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3690 && loop->simduid
3691 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3692 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3693 == loop->simduid))
3695 if (!linear_step)
3696 linear_step = 1;
3697 arginfo->linear_step = linear_step;
3698 arginfo->op = base;
3699 arginfo->simd_lane_linear = true;
3700 return;
3705 /* Return the number of elements in vector type VECTYPE, which is associated
3706 with a SIMD clone. At present these vectors always have a constant
3707 length. */
3709 static unsigned HOST_WIDE_INT
3710 simd_clone_subparts (tree vectype)
3712 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3715 /* Function vectorizable_simd_clone_call.
3717 Check if STMT_INFO performs a function call that can be vectorized
3718 by calling a simd clone of the function.
3719 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3720 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3721 Return true if STMT_INFO is vectorizable in this way. */
3723 static bool
3724 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3725 gimple_stmt_iterator *gsi,
3726 gimple **vec_stmt, slp_tree slp_node,
3727 stmt_vector_for_cost *)
3729 tree vec_dest;
3730 tree scalar_dest;
3731 tree op, type;
3732 tree vec_oprnd0 = NULL_TREE;
3733 tree vectype;
3734 unsigned int nunits;
3735 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3736 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3737 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3738 tree fndecl, new_temp;
3739 int ncopies, j;
3740 auto_vec<simd_call_arg_info> arginfo;
3741 vec<tree> vargs = vNULL;
3742 size_t i, nargs;
3743 tree lhs, rtype, ratype;
3744 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3746 /* Is STMT a vectorizable call? */
3747 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3748 if (!stmt)
3749 return false;
3751 fndecl = gimple_call_fndecl (stmt);
3752 if (fndecl == NULL_TREE)
3753 return false;
3755 struct cgraph_node *node = cgraph_node::get (fndecl);
3756 if (node == NULL || node->simd_clones == NULL)
3757 return false;
3759 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3760 return false;
3762 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3763 && ! vec_stmt)
3764 return false;
3766 if (gimple_call_lhs (stmt)
3767 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3768 return false;
3770 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3772 vectype = STMT_VINFO_VECTYPE (stmt_info);
3774 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3775 return false;
3777 /* FORNOW */
3778 if (slp_node)
3779 return false;
3781 /* Process function arguments. */
3782 nargs = gimple_call_num_args (stmt);
3784 /* Bail out if the function has zero arguments. */
3785 if (nargs == 0)
3786 return false;
3788 arginfo.reserve (nargs, true);
3790 for (i = 0; i < nargs; i++)
3792 simd_call_arg_info thisarginfo;
3793 affine_iv iv;
3795 thisarginfo.linear_step = 0;
3796 thisarginfo.align = 0;
3797 thisarginfo.op = NULL_TREE;
3798 thisarginfo.simd_lane_linear = false;
3800 op = gimple_call_arg (stmt, i);
3801 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3802 &thisarginfo.vectype)
3803 || thisarginfo.dt == vect_uninitialized_def)
3805 if (dump_enabled_p ())
3806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3807 "use not simple.\n");
3808 return false;
3811 if (thisarginfo.dt == vect_constant_def
3812 || thisarginfo.dt == vect_external_def)
3813 gcc_assert (thisarginfo.vectype == NULL_TREE);
3814 else
3816 gcc_assert (thisarginfo.vectype != NULL_TREE);
3817 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3819 if (dump_enabled_p ())
3820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3821 "vector mask arguments are not supported\n");
3822 return false;
3826 /* For linear arguments, the analyze phase should have saved
3827 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3828 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3829 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3831 gcc_assert (vec_stmt);
3832 thisarginfo.linear_step
3833 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3834 thisarginfo.op
3835 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3836 thisarginfo.simd_lane_linear
3837 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3838 == boolean_true_node);
3839 /* If loop has been peeled for alignment, we need to adjust it. */
3840 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3841 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3842 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3844 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3845 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3846 tree opt = TREE_TYPE (thisarginfo.op);
3847 bias = fold_convert (TREE_TYPE (step), bias);
3848 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3849 thisarginfo.op
3850 = fold_build2 (POINTER_TYPE_P (opt)
3851 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3852 thisarginfo.op, bias);
3855 else if (!vec_stmt
3856 && thisarginfo.dt != vect_constant_def
3857 && thisarginfo.dt != vect_external_def
3858 && loop_vinfo
3859 && TREE_CODE (op) == SSA_NAME
3860 && simple_iv (loop, loop_containing_stmt (stmt), op,
3861 &iv, false)
3862 && tree_fits_shwi_p (iv.step))
3864 thisarginfo.linear_step = tree_to_shwi (iv.step);
3865 thisarginfo.op = iv.base;
3867 else if ((thisarginfo.dt == vect_constant_def
3868 || thisarginfo.dt == vect_external_def)
3869 && POINTER_TYPE_P (TREE_TYPE (op)))
3870 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3871 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3872 linear too. */
3873 if (POINTER_TYPE_P (TREE_TYPE (op))
3874 && !thisarginfo.linear_step
3875 && !vec_stmt
3876 && thisarginfo.dt != vect_constant_def
3877 && thisarginfo.dt != vect_external_def
3878 && loop_vinfo
3879 && !slp_node
3880 && TREE_CODE (op) == SSA_NAME)
3881 vect_simd_lane_linear (op, loop, &thisarginfo);
3883 arginfo.quick_push (thisarginfo);
3886 unsigned HOST_WIDE_INT vf;
3887 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3889 if (dump_enabled_p ())
3890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3891 "not considering SIMD clones; not yet supported"
3892 " for variable-width vectors.\n");
3893 return false;
3896 unsigned int badness = 0;
3897 struct cgraph_node *bestn = NULL;
3898 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3899 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3900 else
3901 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3902 n = n->simdclone->next_clone)
3904 unsigned int this_badness = 0;
3905 if (n->simdclone->simdlen > vf
3906 || n->simdclone->nargs != nargs)
3907 continue;
3908 if (n->simdclone->simdlen < vf)
3909 this_badness += (exact_log2 (vf)
3910 - exact_log2 (n->simdclone->simdlen)) * 1024;
3911 if (n->simdclone->inbranch)
3912 this_badness += 2048;
3913 int target_badness = targetm.simd_clone.usable (n);
3914 if (target_badness < 0)
3915 continue;
3916 this_badness += target_badness * 512;
3917 /* FORNOW: Have to add code to add the mask argument. */
3918 if (n->simdclone->inbranch)
3919 continue;
3920 for (i = 0; i < nargs; i++)
3922 switch (n->simdclone->args[i].arg_type)
3924 case SIMD_CLONE_ARG_TYPE_VECTOR:
3925 if (!useless_type_conversion_p
3926 (n->simdclone->args[i].orig_type,
3927 TREE_TYPE (gimple_call_arg (stmt, i))))
3928 i = -1;
3929 else if (arginfo[i].dt == vect_constant_def
3930 || arginfo[i].dt == vect_external_def
3931 || arginfo[i].linear_step)
3932 this_badness += 64;
3933 break;
3934 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3935 if (arginfo[i].dt != vect_constant_def
3936 && arginfo[i].dt != vect_external_def)
3937 i = -1;
3938 break;
3939 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3940 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3941 if (arginfo[i].dt == vect_constant_def
3942 || arginfo[i].dt == vect_external_def
3943 || (arginfo[i].linear_step
3944 != n->simdclone->args[i].linear_step))
3945 i = -1;
3946 break;
3947 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3948 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3949 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3950 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3951 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3952 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3953 /* FORNOW */
3954 i = -1;
3955 break;
3956 case SIMD_CLONE_ARG_TYPE_MASK:
3957 gcc_unreachable ();
3959 if (i == (size_t) -1)
3960 break;
3961 if (n->simdclone->args[i].alignment > arginfo[i].align)
3963 i = -1;
3964 break;
3966 if (arginfo[i].align)
3967 this_badness += (exact_log2 (arginfo[i].align)
3968 - exact_log2 (n->simdclone->args[i].alignment));
3970 if (i == (size_t) -1)
3971 continue;
3972 if (bestn == NULL || this_badness < badness)
3974 bestn = n;
3975 badness = this_badness;
3979 if (bestn == NULL)
3980 return false;
3982 for (i = 0; i < nargs; i++)
3983 if ((arginfo[i].dt == vect_constant_def
3984 || arginfo[i].dt == vect_external_def)
3985 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3987 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3988 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3989 slp_node);
3990 if (arginfo[i].vectype == NULL
3991 || (simd_clone_subparts (arginfo[i].vectype)
3992 > bestn->simdclone->simdlen))
3993 return false;
3996 fndecl = bestn->decl;
3997 nunits = bestn->simdclone->simdlen;
3998 ncopies = vf / nunits;
4000 /* If the function isn't const, only allow it in simd loops where user
4001 has asserted that at least nunits consecutive iterations can be
4002 performed using SIMD instructions. */
4003 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4004 && gimple_vuse (stmt))
4005 return false;
4007 /* Sanity check: make sure that at least one copy of the vectorized stmt
4008 needs to be generated. */
4009 gcc_assert (ncopies >= 1);
4011 if (!vec_stmt) /* transformation not required. */
4013 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4014 for (i = 0; i < nargs; i++)
4015 if ((bestn->simdclone->args[i].arg_type
4016 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4017 || (bestn->simdclone->args[i].arg_type
4018 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4021 + 1,
4022 true);
4023 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4024 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4025 ? size_type_node : TREE_TYPE (arginfo[i].op);
4026 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4028 tree sll = arginfo[i].simd_lane_linear
4029 ? boolean_true_node : boolean_false_node;
4030 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4032 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4033 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4034 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4035 dt, slp_node, cost_vec); */
4036 return true;
4039 /* Transform. */
4041 if (dump_enabled_p ())
4042 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4044 /* Handle def. */
4045 scalar_dest = gimple_call_lhs (stmt);
4046 vec_dest = NULL_TREE;
4047 rtype = NULL_TREE;
4048 ratype = NULL_TREE;
4049 if (scalar_dest)
4051 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4052 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4053 if (TREE_CODE (rtype) == ARRAY_TYPE)
4055 ratype = rtype;
4056 rtype = TREE_TYPE (ratype);
4060 auto_vec<vec<tree> > vec_oprnds;
4061 auto_vec<unsigned> vec_oprnds_i;
4062 vec_oprnds.safe_grow_cleared (nargs, true);
4063 vec_oprnds_i.safe_grow_cleared (nargs, true);
4064 for (j = 0; j < ncopies; ++j)
4066 /* Build argument list for the vectorized call. */
4067 if (j == 0)
4068 vargs.create (nargs);
4069 else
4070 vargs.truncate (0);
4072 for (i = 0; i < nargs; i++)
4074 unsigned int k, l, m, o;
4075 tree atype;
4076 op = gimple_call_arg (stmt, i);
4077 switch (bestn->simdclone->args[i].arg_type)
4079 case SIMD_CLONE_ARG_TYPE_VECTOR:
4080 atype = bestn->simdclone->args[i].vector_type;
4081 o = nunits / simd_clone_subparts (atype);
4082 for (m = j * o; m < (j + 1) * o; m++)
4084 if (simd_clone_subparts (atype)
4085 < simd_clone_subparts (arginfo[i].vectype))
4087 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4088 k = (simd_clone_subparts (arginfo[i].vectype)
4089 / simd_clone_subparts (atype));
4090 gcc_assert ((k & (k - 1)) == 0);
4091 if (m == 0)
4093 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4094 ncopies * o / k, op,
4095 &vec_oprnds[i]);
4096 vec_oprnds_i[i] = 0;
4097 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4099 else
4101 vec_oprnd0 = arginfo[i].op;
4102 if ((m & (k - 1)) == 0)
4103 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4105 arginfo[i].op = vec_oprnd0;
4106 vec_oprnd0
4107 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4108 bitsize_int (prec),
4109 bitsize_int ((m & (k - 1)) * prec));
4110 gassign *new_stmt
4111 = gimple_build_assign (make_ssa_name (atype),
4112 vec_oprnd0);
4113 vect_finish_stmt_generation (vinfo, stmt_info,
4114 new_stmt, gsi);
4115 vargs.safe_push (gimple_assign_lhs (new_stmt));
4117 else
4119 k = (simd_clone_subparts (atype)
4120 / simd_clone_subparts (arginfo[i].vectype));
4121 gcc_assert ((k & (k - 1)) == 0);
4122 vec<constructor_elt, va_gc> *ctor_elts;
4123 if (k != 1)
4124 vec_alloc (ctor_elts, k);
4125 else
4126 ctor_elts = NULL;
4127 for (l = 0; l < k; l++)
4129 if (m == 0 && l == 0)
4131 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4132 k * o * ncopies,
4134 &vec_oprnds[i]);
4135 vec_oprnds_i[i] = 0;
4136 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4138 else
4139 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4140 arginfo[i].op = vec_oprnd0;
4141 if (k == 1)
4142 break;
4143 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4144 vec_oprnd0);
4146 if (k == 1)
4147 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4148 atype))
4150 vec_oprnd0
4151 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4152 gassign *new_stmt
4153 = gimple_build_assign (make_ssa_name (atype),
4154 vec_oprnd0);
4155 vect_finish_stmt_generation (vinfo, stmt_info,
4156 new_stmt, gsi);
4157 vargs.safe_push (gimple_assign_lhs (new_stmt));
4159 else
4160 vargs.safe_push (vec_oprnd0);
4161 else
4163 vec_oprnd0 = build_constructor (atype, ctor_elts);
4164 gassign *new_stmt
4165 = gimple_build_assign (make_ssa_name (atype),
4166 vec_oprnd0);
4167 vect_finish_stmt_generation (vinfo, stmt_info,
4168 new_stmt, gsi);
4169 vargs.safe_push (gimple_assign_lhs (new_stmt));
4173 break;
4174 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4175 vargs.safe_push (op);
4176 break;
4177 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4178 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4179 if (j == 0)
4181 gimple_seq stmts;
4182 arginfo[i].op
4183 = force_gimple_operand (unshare_expr (arginfo[i].op),
4184 &stmts, true, NULL_TREE);
4185 if (stmts != NULL)
4187 basic_block new_bb;
4188 edge pe = loop_preheader_edge (loop);
4189 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4190 gcc_assert (!new_bb);
4192 if (arginfo[i].simd_lane_linear)
4194 vargs.safe_push (arginfo[i].op);
4195 break;
4197 tree phi_res = copy_ssa_name (op);
4198 gphi *new_phi = create_phi_node (phi_res, loop->header);
4199 add_phi_arg (new_phi, arginfo[i].op,
4200 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4201 enum tree_code code
4202 = POINTER_TYPE_P (TREE_TYPE (op))
4203 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4204 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4205 ? sizetype : TREE_TYPE (op);
4206 widest_int cst
4207 = wi::mul (bestn->simdclone->args[i].linear_step,
4208 ncopies * nunits);
4209 tree tcst = wide_int_to_tree (type, cst);
4210 tree phi_arg = copy_ssa_name (op);
4211 gassign *new_stmt
4212 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4213 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4214 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4215 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4216 UNKNOWN_LOCATION);
4217 arginfo[i].op = phi_res;
4218 vargs.safe_push (phi_res);
4220 else
4222 enum tree_code code
4223 = POINTER_TYPE_P (TREE_TYPE (op))
4224 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4225 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4226 ? sizetype : TREE_TYPE (op);
4227 widest_int cst
4228 = wi::mul (bestn->simdclone->args[i].linear_step,
4229 j * nunits);
4230 tree tcst = wide_int_to_tree (type, cst);
4231 new_temp = make_ssa_name (TREE_TYPE (op));
4232 gassign *new_stmt
4233 = gimple_build_assign (new_temp, code,
4234 arginfo[i].op, tcst);
4235 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4236 vargs.safe_push (new_temp);
4238 break;
4239 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4240 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4241 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4242 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4243 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4244 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4245 default:
4246 gcc_unreachable ();
4250 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4251 if (vec_dest)
4253 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4254 if (ratype)
4255 new_temp = create_tmp_var (ratype);
4256 else if (useless_type_conversion_p (vectype, rtype))
4257 new_temp = make_ssa_name (vec_dest, new_call);
4258 else
4259 new_temp = make_ssa_name (rtype, new_call);
4260 gimple_call_set_lhs (new_call, new_temp);
4262 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4263 gimple *new_stmt = new_call;
4265 if (vec_dest)
4267 if (simd_clone_subparts (vectype) < nunits)
4269 unsigned int k, l;
4270 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4271 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4272 k = nunits / simd_clone_subparts (vectype);
4273 gcc_assert ((k & (k - 1)) == 0);
4274 for (l = 0; l < k; l++)
4276 tree t;
4277 if (ratype)
4279 t = build_fold_addr_expr (new_temp);
4280 t = build2 (MEM_REF, vectype, t,
4281 build_int_cst (TREE_TYPE (t), l * bytes));
4283 else
4284 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4285 bitsize_int (prec), bitsize_int (l * prec));
4286 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4287 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4289 if (j == 0 && l == 0)
4290 *vec_stmt = new_stmt;
4291 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4294 if (ratype)
4295 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4296 continue;
4298 else if (simd_clone_subparts (vectype) > nunits)
4300 unsigned int k = (simd_clone_subparts (vectype)
4301 / simd_clone_subparts (rtype));
4302 gcc_assert ((k & (k - 1)) == 0);
4303 if ((j & (k - 1)) == 0)
4304 vec_alloc (ret_ctor_elts, k);
4305 if (ratype)
4307 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4308 for (m = 0; m < o; m++)
4310 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4311 size_int (m), NULL_TREE, NULL_TREE);
4312 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4313 tem);
4314 vect_finish_stmt_generation (vinfo, stmt_info,
4315 new_stmt, gsi);
4316 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4317 gimple_assign_lhs (new_stmt));
4319 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4321 else
4322 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4323 if ((j & (k - 1)) != k - 1)
4324 continue;
4325 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4326 new_stmt
4327 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4328 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4330 if ((unsigned) j == k - 1)
4331 *vec_stmt = new_stmt;
4332 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4333 continue;
4335 else if (ratype)
4337 tree t = build_fold_addr_expr (new_temp);
4338 t = build2 (MEM_REF, vectype, t,
4339 build_int_cst (TREE_TYPE (t), 0));
4340 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4341 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4342 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4344 else if (!useless_type_conversion_p (vectype, rtype))
4346 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4347 new_stmt
4348 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4349 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4353 if (j == 0)
4354 *vec_stmt = new_stmt;
4355 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4358 for (i = 0; i < nargs; ++i)
4360 vec<tree> oprndsi = vec_oprnds[i];
4361 oprndsi.release ();
4363 vargs.release ();
4365 /* The call in STMT might prevent it from being removed in dce.
4366 We however cannot remove it here, due to the way the ssa name
4367 it defines is mapped to the new definition. So just replace
4368 rhs of the statement with something harmless. */
4370 if (slp_node)
4371 return true;
4373 gimple *new_stmt;
4374 if (scalar_dest)
4376 type = TREE_TYPE (scalar_dest);
4377 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4378 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4380 else
4381 new_stmt = gimple_build_nop ();
4382 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4383 unlink_stmt_vdef (stmt);
4385 return true;
4389 /* Function vect_gen_widened_results_half
4391 Create a vector stmt whose code, type, number of arguments, and result
4392 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4393 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4394 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4395 needs to be created (DECL is a function-decl of a target-builtin).
4396 STMT_INFO is the original scalar stmt that we are vectorizing. */
4398 static gimple *
4399 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4400 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4401 tree vec_dest, gimple_stmt_iterator *gsi,
4402 stmt_vec_info stmt_info)
4404 gimple *new_stmt;
4405 tree new_temp;
4407 /* Generate half of the widened result: */
4408 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4409 if (op_type != binary_op)
4410 vec_oprnd1 = NULL;
4411 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4412 new_temp = make_ssa_name (vec_dest, new_stmt);
4413 gimple_assign_set_lhs (new_stmt, new_temp);
4414 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4416 return new_stmt;
4420 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4421 For multi-step conversions store the resulting vectors and call the function
4422 recursively. */
4424 static void
4425 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4426 int multi_step_cvt,
4427 stmt_vec_info stmt_info,
4428 vec<tree> vec_dsts,
4429 gimple_stmt_iterator *gsi,
4430 slp_tree slp_node, enum tree_code code)
4432 unsigned int i;
4433 tree vop0, vop1, new_tmp, vec_dest;
4435 vec_dest = vec_dsts.pop ();
4437 for (i = 0; i < vec_oprnds->length (); i += 2)
4439 /* Create demotion operation. */
4440 vop0 = (*vec_oprnds)[i];
4441 vop1 = (*vec_oprnds)[i + 1];
4442 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4443 new_tmp = make_ssa_name (vec_dest, new_stmt);
4444 gimple_assign_set_lhs (new_stmt, new_tmp);
4445 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4447 if (multi_step_cvt)
4448 /* Store the resulting vector for next recursive call. */
4449 (*vec_oprnds)[i/2] = new_tmp;
4450 else
4452 /* This is the last step of the conversion sequence. Store the
4453 vectors in SLP_NODE or in vector info of the scalar statement
4454 (or in STMT_VINFO_RELATED_STMT chain). */
4455 if (slp_node)
4456 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4457 else
4458 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4462 /* For multi-step demotion operations we first generate demotion operations
4463 from the source type to the intermediate types, and then combine the
4464 results (stored in VEC_OPRNDS) in demotion operation to the destination
4465 type. */
4466 if (multi_step_cvt)
4468 /* At each level of recursion we have half of the operands we had at the
4469 previous level. */
4470 vec_oprnds->truncate ((i+1)/2);
4471 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4472 multi_step_cvt - 1,
4473 stmt_info, vec_dsts, gsi,
4474 slp_node, VEC_PACK_TRUNC_EXPR);
4477 vec_dsts.quick_push (vec_dest);
4481 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4482 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4483 STMT_INFO. For multi-step conversions store the resulting vectors and
4484 call the function recursively. */
4486 static void
4487 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4488 vec<tree> *vec_oprnds0,
4489 vec<tree> *vec_oprnds1,
4490 stmt_vec_info stmt_info, tree vec_dest,
4491 gimple_stmt_iterator *gsi,
4492 enum tree_code code1,
4493 enum tree_code code2, int op_type)
4495 int i;
4496 tree vop0, vop1, new_tmp1, new_tmp2;
4497 gimple *new_stmt1, *new_stmt2;
4498 vec<tree> vec_tmp = vNULL;
4500 vec_tmp.create (vec_oprnds0->length () * 2);
4501 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4503 if (op_type == binary_op)
4504 vop1 = (*vec_oprnds1)[i];
4505 else
4506 vop1 = NULL_TREE;
4508 /* Generate the two halves of promotion operation. */
4509 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4510 op_type, vec_dest, gsi,
4511 stmt_info);
4512 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4513 op_type, vec_dest, gsi,
4514 stmt_info);
4515 if (is_gimple_call (new_stmt1))
4517 new_tmp1 = gimple_call_lhs (new_stmt1);
4518 new_tmp2 = gimple_call_lhs (new_stmt2);
4520 else
4522 new_tmp1 = gimple_assign_lhs (new_stmt1);
4523 new_tmp2 = gimple_assign_lhs (new_stmt2);
4526 /* Store the results for the next step. */
4527 vec_tmp.quick_push (new_tmp1);
4528 vec_tmp.quick_push (new_tmp2);
4531 vec_oprnds0->release ();
4532 *vec_oprnds0 = vec_tmp;
4536 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4537 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4538 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4539 Return true if STMT_INFO is vectorizable in this way. */
4541 static bool
4542 vectorizable_conversion (vec_info *vinfo,
4543 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4544 gimple **vec_stmt, slp_tree slp_node,
4545 stmt_vector_for_cost *cost_vec)
4547 tree vec_dest;
4548 tree scalar_dest;
4549 tree op0, op1 = NULL_TREE;
4550 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4551 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4552 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4553 tree new_temp;
4554 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4555 int ndts = 2;
4556 poly_uint64 nunits_in;
4557 poly_uint64 nunits_out;
4558 tree vectype_out, vectype_in;
4559 int ncopies, i;
4560 tree lhs_type, rhs_type;
4561 enum { NARROW, NONE, WIDEN } modifier;
4562 vec<tree> vec_oprnds0 = vNULL;
4563 vec<tree> vec_oprnds1 = vNULL;
4564 tree vop0;
4565 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4566 int multi_step_cvt = 0;
4567 vec<tree> interm_types = vNULL;
4568 tree intermediate_type, cvt_type = NULL_TREE;
4569 int op_type;
4570 unsigned short fltsz;
4572 /* Is STMT a vectorizable conversion? */
4574 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4575 return false;
4577 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4578 && ! vec_stmt)
4579 return false;
4581 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4582 if (!stmt)
4583 return false;
4585 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4586 return false;
4588 code = gimple_assign_rhs_code (stmt);
4589 if (!CONVERT_EXPR_CODE_P (code)
4590 && code != FIX_TRUNC_EXPR
4591 && code != FLOAT_EXPR
4592 && code != WIDEN_MULT_EXPR
4593 && code != WIDEN_LSHIFT_EXPR)
4594 return false;
4596 op_type = TREE_CODE_LENGTH (code);
4598 /* Check types of lhs and rhs. */
4599 scalar_dest = gimple_assign_lhs (stmt);
4600 lhs_type = TREE_TYPE (scalar_dest);
4601 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4603 /* Check the operands of the operation. */
4604 slp_tree slp_op0, slp_op1 = NULL;
4605 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4606 0, &op0, &slp_op0, &dt[0], &vectype_in))
4608 if (dump_enabled_p ())
4609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4610 "use not simple.\n");
4611 return false;
4614 rhs_type = TREE_TYPE (op0);
4615 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4616 && !((INTEGRAL_TYPE_P (lhs_type)
4617 && INTEGRAL_TYPE_P (rhs_type))
4618 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4619 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4620 return false;
4622 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4623 && ((INTEGRAL_TYPE_P (lhs_type)
4624 && !type_has_mode_precision_p (lhs_type))
4625 || (INTEGRAL_TYPE_P (rhs_type)
4626 && !type_has_mode_precision_p (rhs_type))))
4628 if (dump_enabled_p ())
4629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4630 "type conversion to/from bit-precision unsupported."
4631 "\n");
4632 return false;
4635 if (op_type == binary_op)
4637 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4639 op1 = gimple_assign_rhs2 (stmt);
4640 tree vectype1_in;
4641 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4642 &op1, &slp_op1, &dt[1], &vectype1_in))
4644 if (dump_enabled_p ())
4645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4646 "use not simple.\n");
4647 return false;
4649 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4650 OP1. */
4651 if (!vectype_in)
4652 vectype_in = vectype1_in;
4655 /* If op0 is an external or constant def, infer the vector type
4656 from the scalar type. */
4657 if (!vectype_in)
4658 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4659 if (vec_stmt)
4660 gcc_assert (vectype_in);
4661 if (!vectype_in)
4663 if (dump_enabled_p ())
4664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4665 "no vectype for scalar type %T\n", rhs_type);
4667 return false;
4670 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4671 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4673 if (dump_enabled_p ())
4674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4675 "can't convert between boolean and non "
4676 "boolean vectors %T\n", rhs_type);
4678 return false;
4681 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4682 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4683 if (known_eq (nunits_out, nunits_in))
4684 modifier = NONE;
4685 else if (multiple_p (nunits_out, nunits_in))
4686 modifier = NARROW;
4687 else
4689 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4690 modifier = WIDEN;
4693 /* Multiple types in SLP are handled by creating the appropriate number of
4694 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4695 case of SLP. */
4696 if (slp_node)
4697 ncopies = 1;
4698 else if (modifier == NARROW)
4699 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4700 else
4701 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4703 /* Sanity check: make sure that at least one copy of the vectorized stmt
4704 needs to be generated. */
4705 gcc_assert (ncopies >= 1);
4707 bool found_mode = false;
4708 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4709 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4710 opt_scalar_mode rhs_mode_iter;
4712 /* Supportable by target? */
4713 switch (modifier)
4715 case NONE:
4716 if (code != FIX_TRUNC_EXPR
4717 && code != FLOAT_EXPR
4718 && !CONVERT_EXPR_CODE_P (code))
4719 return false;
4720 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4721 break;
4722 /* FALLTHRU */
4723 unsupported:
4724 if (dump_enabled_p ())
4725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4726 "conversion not supported by target.\n");
4727 return false;
4729 case WIDEN:
4730 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4731 vectype_in, &code1, &code2,
4732 &multi_step_cvt, &interm_types))
4734 /* Binary widening operation can only be supported directly by the
4735 architecture. */
4736 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4737 break;
4740 if (code != FLOAT_EXPR
4741 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4742 goto unsupported;
4744 fltsz = GET_MODE_SIZE (lhs_mode);
4745 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4747 rhs_mode = rhs_mode_iter.require ();
4748 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4749 break;
4751 cvt_type
4752 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4753 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4754 if (cvt_type == NULL_TREE)
4755 goto unsupported;
4757 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4759 if (!supportable_convert_operation (code, vectype_out,
4760 cvt_type, &codecvt1))
4761 goto unsupported;
4763 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4764 vectype_out, cvt_type,
4765 &codecvt1, &codecvt2,
4766 &multi_step_cvt,
4767 &interm_types))
4768 continue;
4769 else
4770 gcc_assert (multi_step_cvt == 0);
4772 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4773 cvt_type,
4774 vectype_in, &code1, &code2,
4775 &multi_step_cvt, &interm_types))
4777 found_mode = true;
4778 break;
4782 if (!found_mode)
4783 goto unsupported;
4785 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4786 codecvt2 = ERROR_MARK;
4787 else
4789 multi_step_cvt++;
4790 interm_types.safe_push (cvt_type);
4791 cvt_type = NULL_TREE;
4793 break;
4795 case NARROW:
4796 gcc_assert (op_type == unary_op);
4797 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4798 &code1, &multi_step_cvt,
4799 &interm_types))
4800 break;
4802 if (code != FIX_TRUNC_EXPR
4803 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4804 goto unsupported;
4806 cvt_type
4807 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4808 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4809 if (cvt_type == NULL_TREE)
4810 goto unsupported;
4811 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4812 &codecvt1))
4813 goto unsupported;
4814 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4815 &code1, &multi_step_cvt,
4816 &interm_types))
4817 break;
4818 goto unsupported;
4820 default:
4821 gcc_unreachable ();
4824 if (!vec_stmt) /* transformation not required. */
4826 if (slp_node
4827 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4828 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4830 if (dump_enabled_p ())
4831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4832 "incompatible vector types for invariants\n");
4833 return false;
4835 DUMP_VECT_SCOPE ("vectorizable_conversion");
4836 if (modifier == NONE)
4838 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4839 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4840 cost_vec);
4842 else if (modifier == NARROW)
4844 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4845 /* The final packing step produces one vector result per copy. */
4846 unsigned int nvectors
4847 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4848 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4849 multi_step_cvt, cost_vec);
4851 else
4853 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4854 /* The initial unpacking step produces two vector results
4855 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4856 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4857 unsigned int nvectors
4858 = (slp_node
4859 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4860 : ncopies * 2);
4861 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4862 multi_step_cvt, cost_vec);
4864 interm_types.release ();
4865 return true;
4868 /* Transform. */
4869 if (dump_enabled_p ())
4870 dump_printf_loc (MSG_NOTE, vect_location,
4871 "transform conversion. ncopies = %d.\n", ncopies);
4873 if (op_type == binary_op)
4875 if (CONSTANT_CLASS_P (op0))
4876 op0 = fold_convert (TREE_TYPE (op1), op0);
4877 else if (CONSTANT_CLASS_P (op1))
4878 op1 = fold_convert (TREE_TYPE (op0), op1);
4881 /* In case of multi-step conversion, we first generate conversion operations
4882 to the intermediate types, and then from that types to the final one.
4883 We create vector destinations for the intermediate type (TYPES) received
4884 from supportable_*_operation, and store them in the correct order
4885 for future use in vect_create_vectorized_*_stmts (). */
4886 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4887 vec_dest = vect_create_destination_var (scalar_dest,
4888 (cvt_type && modifier == WIDEN)
4889 ? cvt_type : vectype_out);
4890 vec_dsts.quick_push (vec_dest);
4892 if (multi_step_cvt)
4894 for (i = interm_types.length () - 1;
4895 interm_types.iterate (i, &intermediate_type); i--)
4897 vec_dest = vect_create_destination_var (scalar_dest,
4898 intermediate_type);
4899 vec_dsts.quick_push (vec_dest);
4903 if (cvt_type)
4904 vec_dest = vect_create_destination_var (scalar_dest,
4905 modifier == WIDEN
4906 ? vectype_out : cvt_type);
4908 int ninputs = 1;
4909 if (!slp_node)
4911 if (modifier == WIDEN)
4913 else if (modifier == NARROW)
4915 if (multi_step_cvt)
4916 ninputs = vect_pow2 (multi_step_cvt);
4917 ninputs *= 2;
4921 switch (modifier)
4923 case NONE:
4924 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
4925 op0, &vec_oprnds0);
4926 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4928 /* Arguments are ready, create the new vector stmt. */
4929 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4930 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4931 new_temp = make_ssa_name (vec_dest, new_stmt);
4932 gimple_assign_set_lhs (new_stmt, new_temp);
4933 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4935 if (slp_node)
4936 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4937 else
4938 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4940 break;
4942 case WIDEN:
4943 /* In case the vectorization factor (VF) is bigger than the number
4944 of elements that we can fit in a vectype (nunits), we have to
4945 generate more than one vector stmt - i.e - we need to "unroll"
4946 the vector stmt by a factor VF/nunits. */
4947 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4948 op0, &vec_oprnds0,
4949 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
4950 &vec_oprnds1);
4951 if (code == WIDEN_LSHIFT_EXPR)
4953 vec_oprnds1.create (ncopies * ninputs);
4954 for (i = 0; i < ncopies * ninputs; ++i)
4955 vec_oprnds1.quick_push (op1);
4957 /* Arguments are ready. Create the new vector stmts. */
4958 for (i = multi_step_cvt; i >= 0; i--)
4960 tree this_dest = vec_dsts[i];
4961 enum tree_code c1 = code1, c2 = code2;
4962 if (i == 0 && codecvt2 != ERROR_MARK)
4964 c1 = codecvt1;
4965 c2 = codecvt2;
4967 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
4968 &vec_oprnds1, stmt_info,
4969 this_dest, gsi,
4970 c1, c2, op_type);
4973 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4975 gimple *new_stmt;
4976 if (cvt_type)
4978 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4979 new_temp = make_ssa_name (vec_dest);
4980 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
4981 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4983 else
4984 new_stmt = SSA_NAME_DEF_STMT (vop0);
4986 if (slp_node)
4987 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4988 else
4989 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4991 break;
4993 case NARROW:
4994 /* In case the vectorization factor (VF) is bigger than the number
4995 of elements that we can fit in a vectype (nunits), we have to
4996 generate more than one vector stmt - i.e - we need to "unroll"
4997 the vector stmt by a factor VF/nunits. */
4998 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4999 op0, &vec_oprnds0);
5000 /* Arguments are ready. Create the new vector stmts. */
5001 if (cvt_type)
5002 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5004 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5005 new_temp = make_ssa_name (vec_dest);
5006 gassign *new_stmt
5007 = gimple_build_assign (new_temp, codecvt1, vop0);
5008 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5009 vec_oprnds0[i] = new_temp;
5012 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5013 multi_step_cvt,
5014 stmt_info, vec_dsts, gsi,
5015 slp_node, code1);
5016 break;
5018 if (!slp_node)
5019 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5021 vec_oprnds0.release ();
5022 vec_oprnds1.release ();
5023 interm_types.release ();
5025 return true;
5028 /* Return true if we can assume from the scalar form of STMT_INFO that
5029 neither the scalar nor the vector forms will generate code. STMT_INFO
5030 is known not to involve a data reference. */
5032 bool
5033 vect_nop_conversion_p (stmt_vec_info stmt_info)
5035 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5036 if (!stmt)
5037 return false;
5039 tree lhs = gimple_assign_lhs (stmt);
5040 tree_code code = gimple_assign_rhs_code (stmt);
5041 tree rhs = gimple_assign_rhs1 (stmt);
5043 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5044 return true;
5046 if (CONVERT_EXPR_CODE_P (code))
5047 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5049 return false;
5052 /* Function vectorizable_assignment.
5054 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5055 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5056 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5057 Return true if STMT_INFO is vectorizable in this way. */
5059 static bool
5060 vectorizable_assignment (vec_info *vinfo,
5061 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5062 gimple **vec_stmt, slp_tree slp_node,
5063 stmt_vector_for_cost *cost_vec)
5065 tree vec_dest;
5066 tree scalar_dest;
5067 tree op;
5068 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5069 tree new_temp;
5070 enum vect_def_type dt[1] = {vect_unknown_def_type};
5071 int ndts = 1;
5072 int ncopies;
5073 int i;
5074 vec<tree> vec_oprnds = vNULL;
5075 tree vop;
5076 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5077 enum tree_code code;
5078 tree vectype_in;
5080 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5081 return false;
5083 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5084 && ! vec_stmt)
5085 return false;
5087 /* Is vectorizable assignment? */
5088 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5089 if (!stmt)
5090 return false;
5092 scalar_dest = gimple_assign_lhs (stmt);
5093 if (TREE_CODE (scalar_dest) != SSA_NAME)
5094 return false;
5096 if (STMT_VINFO_DATA_REF (stmt_info))
5097 return false;
5099 code = gimple_assign_rhs_code (stmt);
5100 if (!(gimple_assign_single_p (stmt)
5101 || code == PAREN_EXPR
5102 || CONVERT_EXPR_CODE_P (code)))
5103 return false;
5105 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5106 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5108 /* Multiple types in SLP are handled by creating the appropriate number of
5109 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5110 case of SLP. */
5111 if (slp_node)
5112 ncopies = 1;
5113 else
5114 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5116 gcc_assert (ncopies >= 1);
5118 slp_tree slp_op;
5119 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5120 &dt[0], &vectype_in))
5122 if (dump_enabled_p ())
5123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5124 "use not simple.\n");
5125 return false;
5127 if (!vectype_in)
5128 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5130 /* We can handle NOP_EXPR conversions that do not change the number
5131 of elements or the vector size. */
5132 if ((CONVERT_EXPR_CODE_P (code)
5133 || code == VIEW_CONVERT_EXPR)
5134 && (!vectype_in
5135 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5136 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5137 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5138 return false;
5140 /* We do not handle bit-precision changes. */
5141 if ((CONVERT_EXPR_CODE_P (code)
5142 || code == VIEW_CONVERT_EXPR)
5143 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5144 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5145 || !type_has_mode_precision_p (TREE_TYPE (op)))
5146 /* But a conversion that does not change the bit-pattern is ok. */
5147 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5148 > TYPE_PRECISION (TREE_TYPE (op)))
5149 && TYPE_UNSIGNED (TREE_TYPE (op)))
5150 /* Conversion between boolean types of different sizes is
5151 a simple assignment in case their vectypes are same
5152 boolean vectors. */
5153 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5154 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5156 if (dump_enabled_p ())
5157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5158 "type conversion to/from bit-precision "
5159 "unsupported.\n");
5160 return false;
5163 if (!vec_stmt) /* transformation not required. */
5165 if (slp_node
5166 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5168 if (dump_enabled_p ())
5169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5170 "incompatible vector types for invariants\n");
5171 return false;
5173 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5174 DUMP_VECT_SCOPE ("vectorizable_assignment");
5175 if (!vect_nop_conversion_p (stmt_info))
5176 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5177 cost_vec);
5178 return true;
5181 /* Transform. */
5182 if (dump_enabled_p ())
5183 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5185 /* Handle def. */
5186 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5188 /* Handle use. */
5189 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5191 /* Arguments are ready. create the new vector stmt. */
5192 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5194 if (CONVERT_EXPR_CODE_P (code)
5195 || code == VIEW_CONVERT_EXPR)
5196 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5197 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5198 new_temp = make_ssa_name (vec_dest, new_stmt);
5199 gimple_assign_set_lhs (new_stmt, new_temp);
5200 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5201 if (slp_node)
5202 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5203 else
5204 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5206 if (!slp_node)
5207 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5209 vec_oprnds.release ();
5210 return true;
5214 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5215 either as shift by a scalar or by a vector. */
5217 bool
5218 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5221 machine_mode vec_mode;
5222 optab optab;
5223 int icode;
5224 tree vectype;
5226 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5227 if (!vectype)
5228 return false;
5230 optab = optab_for_tree_code (code, vectype, optab_scalar);
5231 if (!optab
5232 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5234 optab = optab_for_tree_code (code, vectype, optab_vector);
5235 if (!optab
5236 || (optab_handler (optab, TYPE_MODE (vectype))
5237 == CODE_FOR_nothing))
5238 return false;
5241 vec_mode = TYPE_MODE (vectype);
5242 icode = (int) optab_handler (optab, vec_mode);
5243 if (icode == CODE_FOR_nothing)
5244 return false;
5246 return true;
5250 /* Function vectorizable_shift.
5252 Check if STMT_INFO performs a shift operation that can be vectorized.
5253 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5254 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5255 Return true if STMT_INFO is vectorizable in this way. */
5257 static bool
5258 vectorizable_shift (vec_info *vinfo,
5259 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5260 gimple **vec_stmt, slp_tree slp_node,
5261 stmt_vector_for_cost *cost_vec)
5263 tree vec_dest;
5264 tree scalar_dest;
5265 tree op0, op1 = NULL;
5266 tree vec_oprnd1 = NULL_TREE;
5267 tree vectype;
5268 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5269 enum tree_code code;
5270 machine_mode vec_mode;
5271 tree new_temp;
5272 optab optab;
5273 int icode;
5274 machine_mode optab_op2_mode;
5275 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5276 int ndts = 2;
5277 poly_uint64 nunits_in;
5278 poly_uint64 nunits_out;
5279 tree vectype_out;
5280 tree op1_vectype;
5281 int ncopies;
5282 int i;
5283 vec<tree> vec_oprnds0 = vNULL;
5284 vec<tree> vec_oprnds1 = vNULL;
5285 tree vop0, vop1;
5286 unsigned int k;
5287 bool scalar_shift_arg = true;
5288 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5289 bool incompatible_op1_vectype_p = false;
5291 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5292 return false;
5294 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5295 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5296 && ! vec_stmt)
5297 return false;
5299 /* Is STMT a vectorizable binary/unary operation? */
5300 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5301 if (!stmt)
5302 return false;
5304 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5305 return false;
5307 code = gimple_assign_rhs_code (stmt);
5309 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5310 || code == RROTATE_EXPR))
5311 return false;
5313 scalar_dest = gimple_assign_lhs (stmt);
5314 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5315 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5317 if (dump_enabled_p ())
5318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5319 "bit-precision shifts not supported.\n");
5320 return false;
5323 slp_tree slp_op0;
5324 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5325 0, &op0, &slp_op0, &dt[0], &vectype))
5327 if (dump_enabled_p ())
5328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5329 "use not simple.\n");
5330 return false;
5332 /* If op0 is an external or constant def, infer the vector type
5333 from the scalar type. */
5334 if (!vectype)
5335 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5336 if (vec_stmt)
5337 gcc_assert (vectype);
5338 if (!vectype)
5340 if (dump_enabled_p ())
5341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5342 "no vectype for scalar type\n");
5343 return false;
5346 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5347 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5348 if (maybe_ne (nunits_out, nunits_in))
5349 return false;
5351 stmt_vec_info op1_def_stmt_info;
5352 slp_tree slp_op1;
5353 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5354 &dt[1], &op1_vectype, &op1_def_stmt_info))
5356 if (dump_enabled_p ())
5357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5358 "use not simple.\n");
5359 return false;
5362 /* Multiple types in SLP are handled by creating the appropriate number of
5363 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5364 case of SLP. */
5365 if (slp_node)
5366 ncopies = 1;
5367 else
5368 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5370 gcc_assert (ncopies >= 1);
5372 /* Determine whether the shift amount is a vector, or scalar. If the
5373 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5375 if ((dt[1] == vect_internal_def
5376 || dt[1] == vect_induction_def
5377 || dt[1] == vect_nested_cycle)
5378 && !slp_node)
5379 scalar_shift_arg = false;
5380 else if (dt[1] == vect_constant_def
5381 || dt[1] == vect_external_def
5382 || dt[1] == vect_internal_def)
5384 /* In SLP, need to check whether the shift count is the same,
5385 in loops if it is a constant or invariant, it is always
5386 a scalar shift. */
5387 if (slp_node)
5389 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5390 stmt_vec_info slpstmt_info;
5392 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5394 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5395 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5396 scalar_shift_arg = false;
5399 /* For internal SLP defs we have to make sure we see scalar stmts
5400 for all vector elements.
5401 ??? For different vectors we could resort to a different
5402 scalar shift operand but code-generation below simply always
5403 takes the first. */
5404 if (dt[1] == vect_internal_def
5405 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5406 stmts.length ()))
5407 scalar_shift_arg = false;
5410 /* If the shift amount is computed by a pattern stmt we cannot
5411 use the scalar amount directly thus give up and use a vector
5412 shift. */
5413 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5414 scalar_shift_arg = false;
5416 else
5418 if (dump_enabled_p ())
5419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5420 "operand mode requires invariant argument.\n");
5421 return false;
5424 /* Vector shifted by vector. */
5425 bool was_scalar_shift_arg = scalar_shift_arg;
5426 if (!scalar_shift_arg)
5428 optab = optab_for_tree_code (code, vectype, optab_vector);
5429 if (dump_enabled_p ())
5430 dump_printf_loc (MSG_NOTE, vect_location,
5431 "vector/vector shift/rotate found.\n");
5433 if (!op1_vectype)
5434 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5435 slp_op1);
5436 incompatible_op1_vectype_p
5437 = (op1_vectype == NULL_TREE
5438 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5439 TYPE_VECTOR_SUBPARTS (vectype))
5440 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5441 if (incompatible_op1_vectype_p
5442 && (!slp_node
5443 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5444 || slp_op1->refcnt != 1))
5446 if (dump_enabled_p ())
5447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5448 "unusable type for last operand in"
5449 " vector/vector shift/rotate.\n");
5450 return false;
5453 /* See if the machine has a vector shifted by scalar insn and if not
5454 then see if it has a vector shifted by vector insn. */
5455 else
5457 optab = optab_for_tree_code (code, vectype, optab_scalar);
5458 if (optab
5459 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5461 if (dump_enabled_p ())
5462 dump_printf_loc (MSG_NOTE, vect_location,
5463 "vector/scalar shift/rotate found.\n");
5465 else
5467 optab = optab_for_tree_code (code, vectype, optab_vector);
5468 if (optab
5469 && (optab_handler (optab, TYPE_MODE (vectype))
5470 != CODE_FOR_nothing))
5472 scalar_shift_arg = false;
5474 if (dump_enabled_p ())
5475 dump_printf_loc (MSG_NOTE, vect_location,
5476 "vector/vector shift/rotate found.\n");
5478 if (!op1_vectype)
5479 op1_vectype = get_vectype_for_scalar_type (vinfo,
5480 TREE_TYPE (op1),
5481 slp_op1);
5483 /* Unlike the other binary operators, shifts/rotates have
5484 the rhs being int, instead of the same type as the lhs,
5485 so make sure the scalar is the right type if we are
5486 dealing with vectors of long long/long/short/char. */
5487 incompatible_op1_vectype_p
5488 = (!op1_vectype
5489 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5490 TREE_TYPE (op1)));
5491 if (incompatible_op1_vectype_p
5492 && dt[1] == vect_internal_def)
5494 if (dump_enabled_p ())
5495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5496 "unusable type for last operand in"
5497 " vector/vector shift/rotate.\n");
5498 return false;
5504 /* Supportable by target? */
5505 if (!optab)
5507 if (dump_enabled_p ())
5508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5509 "no optab.\n");
5510 return false;
5512 vec_mode = TYPE_MODE (vectype);
5513 icode = (int) optab_handler (optab, vec_mode);
5514 if (icode == CODE_FOR_nothing)
5516 if (dump_enabled_p ())
5517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5518 "op not supported by target.\n");
5519 /* Check only during analysis. */
5520 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5521 || (!vec_stmt
5522 && !vect_worthwhile_without_simd_p (vinfo, code)))
5523 return false;
5524 if (dump_enabled_p ())
5525 dump_printf_loc (MSG_NOTE, vect_location,
5526 "proceeding using word mode.\n");
5529 /* Worthwhile without SIMD support? Check only during analysis. */
5530 if (!vec_stmt
5531 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5532 && !vect_worthwhile_without_simd_p (vinfo, code))
5534 if (dump_enabled_p ())
5535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5536 "not worthwhile without SIMD support.\n");
5537 return false;
5540 if (!vec_stmt) /* transformation not required. */
5542 if (slp_node
5543 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5544 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5545 && (!incompatible_op1_vectype_p
5546 || dt[1] == vect_constant_def)
5547 && !vect_maybe_update_slp_op_vectype
5548 (slp_op1,
5549 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5551 if (dump_enabled_p ())
5552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5553 "incompatible vector types for invariants\n");
5554 return false;
5556 /* Now adjust the constant shift amount in place. */
5557 if (slp_node
5558 && incompatible_op1_vectype_p
5559 && dt[1] == vect_constant_def)
5561 for (unsigned i = 0;
5562 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5564 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5565 = fold_convert (TREE_TYPE (vectype),
5566 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5567 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5568 == INTEGER_CST));
5571 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5572 DUMP_VECT_SCOPE ("vectorizable_shift");
5573 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5574 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5575 return true;
5578 /* Transform. */
5580 if (dump_enabled_p ())
5581 dump_printf_loc (MSG_NOTE, vect_location,
5582 "transform binary/unary operation.\n");
5584 if (incompatible_op1_vectype_p && !slp_node)
5586 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5587 op1 = fold_convert (TREE_TYPE (vectype), op1);
5588 if (dt[1] != vect_constant_def)
5589 op1 = vect_init_vector (vinfo, stmt_info, op1,
5590 TREE_TYPE (vectype), NULL);
5593 /* Handle def. */
5594 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5596 if (scalar_shift_arg && dt[1] != vect_internal_def)
5598 /* Vector shl and shr insn patterns can be defined with scalar
5599 operand 2 (shift operand). In this case, use constant or loop
5600 invariant op1 directly, without extending it to vector mode
5601 first. */
5602 optab_op2_mode = insn_data[icode].operand[2].mode;
5603 if (!VECTOR_MODE_P (optab_op2_mode))
5605 if (dump_enabled_p ())
5606 dump_printf_loc (MSG_NOTE, vect_location,
5607 "operand 1 using scalar mode.\n");
5608 vec_oprnd1 = op1;
5609 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5610 vec_oprnds1.quick_push (vec_oprnd1);
5611 /* Store vec_oprnd1 for every vector stmt to be created.
5612 We check during the analysis that all the shift arguments
5613 are the same.
5614 TODO: Allow different constants for different vector
5615 stmts generated for an SLP instance. */
5616 for (k = 0;
5617 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5618 vec_oprnds1.quick_push (vec_oprnd1);
5621 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5623 if (was_scalar_shift_arg)
5625 /* If the argument was the same in all lanes create
5626 the correctly typed vector shift amount directly. */
5627 op1 = fold_convert (TREE_TYPE (vectype), op1);
5628 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5629 !loop_vinfo ? gsi : NULL);
5630 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5631 !loop_vinfo ? gsi : NULL);
5632 vec_oprnds1.create (slp_node->vec_stmts_size);
5633 for (k = 0; k < slp_node->vec_stmts_size; k++)
5634 vec_oprnds1.quick_push (vec_oprnd1);
5636 else if (dt[1] == vect_constant_def)
5637 /* The constant shift amount has been adjusted in place. */
5639 else
5640 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5643 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5644 (a special case for certain kind of vector shifts); otherwise,
5645 operand 1 should be of a vector type (the usual case). */
5646 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5647 op0, &vec_oprnds0,
5648 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5650 /* Arguments are ready. Create the new vector stmt. */
5651 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5653 /* For internal defs where we need to use a scalar shift arg
5654 extract the first lane. */
5655 if (scalar_shift_arg && dt[1] == vect_internal_def)
5657 vop1 = vec_oprnds1[0];
5658 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5659 gassign *new_stmt
5660 = gimple_build_assign (new_temp,
5661 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5662 vop1,
5663 TYPE_SIZE (TREE_TYPE (new_temp)),
5664 bitsize_zero_node));
5665 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5666 vop1 = new_temp;
5668 else
5669 vop1 = vec_oprnds1[i];
5670 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5671 new_temp = make_ssa_name (vec_dest, new_stmt);
5672 gimple_assign_set_lhs (new_stmt, new_temp);
5673 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5674 if (slp_node)
5675 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5676 else
5677 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5680 if (!slp_node)
5681 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5683 vec_oprnds0.release ();
5684 vec_oprnds1.release ();
5686 return true;
5690 /* Function vectorizable_operation.
5692 Check if STMT_INFO performs a binary, unary or ternary operation that can
5693 be vectorized.
5694 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5695 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5696 Return true if STMT_INFO is vectorizable in this way. */
5698 static bool
5699 vectorizable_operation (vec_info *vinfo,
5700 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5701 gimple **vec_stmt, slp_tree slp_node,
5702 stmt_vector_for_cost *cost_vec)
5704 tree vec_dest;
5705 tree scalar_dest;
5706 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5707 tree vectype;
5708 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5709 enum tree_code code, orig_code;
5710 machine_mode vec_mode;
5711 tree new_temp;
5712 int op_type;
5713 optab optab;
5714 bool target_support_p;
5715 enum vect_def_type dt[3]
5716 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5717 int ndts = 3;
5718 poly_uint64 nunits_in;
5719 poly_uint64 nunits_out;
5720 tree vectype_out;
5721 int ncopies, vec_num;
5722 int i;
5723 vec<tree> vec_oprnds0 = vNULL;
5724 vec<tree> vec_oprnds1 = vNULL;
5725 vec<tree> vec_oprnds2 = vNULL;
5726 tree vop0, vop1, vop2;
5727 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5729 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5730 return false;
5732 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5733 && ! vec_stmt)
5734 return false;
5736 /* Is STMT a vectorizable binary/unary operation? */
5737 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5738 if (!stmt)
5739 return false;
5741 /* Loads and stores are handled in vectorizable_{load,store}. */
5742 if (STMT_VINFO_DATA_REF (stmt_info))
5743 return false;
5745 orig_code = code = gimple_assign_rhs_code (stmt);
5747 /* Shifts are handled in vectorizable_shift. */
5748 if (code == LSHIFT_EXPR
5749 || code == RSHIFT_EXPR
5750 || code == LROTATE_EXPR
5751 || code == RROTATE_EXPR)
5752 return false;
5754 /* Comparisons are handled in vectorizable_comparison. */
5755 if (TREE_CODE_CLASS (code) == tcc_comparison)
5756 return false;
5758 /* Conditions are handled in vectorizable_condition. */
5759 if (code == COND_EXPR)
5760 return false;
5762 /* For pointer addition and subtraction, we should use the normal
5763 plus and minus for the vector operation. */
5764 if (code == POINTER_PLUS_EXPR)
5765 code = PLUS_EXPR;
5766 if (code == POINTER_DIFF_EXPR)
5767 code = MINUS_EXPR;
5769 /* Support only unary or binary operations. */
5770 op_type = TREE_CODE_LENGTH (code);
5771 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5773 if (dump_enabled_p ())
5774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5775 "num. args = %d (not unary/binary/ternary op).\n",
5776 op_type);
5777 return false;
5780 scalar_dest = gimple_assign_lhs (stmt);
5781 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5783 /* Most operations cannot handle bit-precision types without extra
5784 truncations. */
5785 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5786 if (!mask_op_p
5787 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5788 /* Exception are bitwise binary operations. */
5789 && code != BIT_IOR_EXPR
5790 && code != BIT_XOR_EXPR
5791 && code != BIT_AND_EXPR)
5793 if (dump_enabled_p ())
5794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5795 "bit-precision arithmetic not supported.\n");
5796 return false;
5799 slp_tree slp_op0;
5800 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5801 0, &op0, &slp_op0, &dt[0], &vectype))
5803 if (dump_enabled_p ())
5804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5805 "use not simple.\n");
5806 return false;
5808 /* If op0 is an external or constant def, infer the vector type
5809 from the scalar type. */
5810 if (!vectype)
5812 /* For boolean type we cannot determine vectype by
5813 invariant value (don't know whether it is a vector
5814 of booleans or vector of integers). We use output
5815 vectype because operations on boolean don't change
5816 type. */
5817 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5819 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5821 if (dump_enabled_p ())
5822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5823 "not supported operation on bool value.\n");
5824 return false;
5826 vectype = vectype_out;
5828 else
5829 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5830 slp_node);
5832 if (vec_stmt)
5833 gcc_assert (vectype);
5834 if (!vectype)
5836 if (dump_enabled_p ())
5837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5838 "no vectype for scalar type %T\n",
5839 TREE_TYPE (op0));
5841 return false;
5844 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5845 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5846 if (maybe_ne (nunits_out, nunits_in))
5847 return false;
5849 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5850 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5851 if (op_type == binary_op || op_type == ternary_op)
5853 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5854 1, &op1, &slp_op1, &dt[1], &vectype2))
5856 if (dump_enabled_p ())
5857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5858 "use not simple.\n");
5859 return false;
5862 if (op_type == ternary_op)
5864 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5865 2, &op2, &slp_op2, &dt[2], &vectype3))
5867 if (dump_enabled_p ())
5868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5869 "use not simple.\n");
5870 return false;
5874 /* Multiple types in SLP are handled by creating the appropriate number of
5875 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5876 case of SLP. */
5877 if (slp_node)
5879 ncopies = 1;
5880 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5882 else
5884 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5885 vec_num = 1;
5888 gcc_assert (ncopies >= 1);
5890 /* Reject attempts to combine mask types with nonmask types, e.g. if
5891 we have an AND between a (nonmask) boolean loaded from memory and
5892 a (mask) boolean result of a comparison.
5894 TODO: We could easily fix these cases up using pattern statements. */
5895 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5896 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5897 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5899 if (dump_enabled_p ())
5900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5901 "mixed mask and nonmask vector types\n");
5902 return false;
5905 /* Supportable by target? */
5907 vec_mode = TYPE_MODE (vectype);
5908 if (code == MULT_HIGHPART_EXPR)
5909 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5910 else
5912 optab = optab_for_tree_code (code, vectype, optab_default);
5913 if (!optab)
5915 if (dump_enabled_p ())
5916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5917 "no optab.\n");
5918 return false;
5920 target_support_p = (optab_handler (optab, vec_mode)
5921 != CODE_FOR_nothing);
5924 if (!target_support_p)
5926 if (dump_enabled_p ())
5927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5928 "op not supported by target.\n");
5929 /* Check only during analysis. */
5930 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5931 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5932 return false;
5933 if (dump_enabled_p ())
5934 dump_printf_loc (MSG_NOTE, vect_location,
5935 "proceeding using word mode.\n");
5938 /* Worthwhile without SIMD support? Check only during analysis. */
5939 if (!VECTOR_MODE_P (vec_mode)
5940 && !vec_stmt
5941 && !vect_worthwhile_without_simd_p (vinfo, code))
5943 if (dump_enabled_p ())
5944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5945 "not worthwhile without SIMD support.\n");
5946 return false;
5949 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
5950 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
5951 internal_fn cond_fn = get_conditional_internal_fn (code);
5953 if (!vec_stmt) /* transformation not required. */
5955 /* If this operation is part of a reduction, a fully-masked loop
5956 should only change the active lanes of the reduction chain,
5957 keeping the inactive lanes as-is. */
5958 if (loop_vinfo
5959 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5960 && reduc_idx >= 0)
5962 if (cond_fn == IFN_LAST
5963 || !direct_internal_fn_supported_p (cond_fn, vectype,
5964 OPTIMIZE_FOR_SPEED))
5966 if (dump_enabled_p ())
5967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5968 "can't use a fully-masked loop because no"
5969 " conditional operation is available.\n");
5970 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
5972 else
5973 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
5974 vectype, NULL);
5977 /* Put types on constant and invariant SLP children. */
5978 if (slp_node
5979 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5980 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
5981 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
5983 if (dump_enabled_p ())
5984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5985 "incompatible vector types for invariants\n");
5986 return false;
5989 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5990 DUMP_VECT_SCOPE ("vectorizable_operation");
5991 vect_model_simple_cost (vinfo, stmt_info,
5992 ncopies, dt, ndts, slp_node, cost_vec);
5993 return true;
5996 /* Transform. */
5998 if (dump_enabled_p ())
5999 dump_printf_loc (MSG_NOTE, vect_location,
6000 "transform binary/unary operation.\n");
6002 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6004 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6005 vectors with unsigned elements, but the result is signed. So, we
6006 need to compute the MINUS_EXPR into vectype temporary and
6007 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6008 tree vec_cvt_dest = NULL_TREE;
6009 if (orig_code == POINTER_DIFF_EXPR)
6011 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6012 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6014 /* Handle def. */
6015 else
6016 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6018 /* In case the vectorization factor (VF) is bigger than the number
6019 of elements that we can fit in a vectype (nunits), we have to generate
6020 more than one vector stmt - i.e - we need to "unroll" the
6021 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6022 from one copy of the vector stmt to the next, in the field
6023 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6024 stages to find the correct vector defs to be used when vectorizing
6025 stmts that use the defs of the current stmt. The example below
6026 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6027 we need to create 4 vectorized stmts):
6029 before vectorization:
6030 RELATED_STMT VEC_STMT
6031 S1: x = memref - -
6032 S2: z = x + 1 - -
6034 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6035 there):
6036 RELATED_STMT VEC_STMT
6037 VS1_0: vx0 = memref0 VS1_1 -
6038 VS1_1: vx1 = memref1 VS1_2 -
6039 VS1_2: vx2 = memref2 VS1_3 -
6040 VS1_3: vx3 = memref3 - -
6041 S1: x = load - VS1_0
6042 S2: z = x + 1 - -
6044 step2: vectorize stmt S2 (done here):
6045 To vectorize stmt S2 we first need to find the relevant vector
6046 def for the first operand 'x'. This is, as usual, obtained from
6047 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6048 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6049 relevant vector def 'vx0'. Having found 'vx0' we can generate
6050 the vector stmt VS2_0, and as usual, record it in the
6051 STMT_VINFO_VEC_STMT of stmt S2.
6052 When creating the second copy (VS2_1), we obtain the relevant vector
6053 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6054 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6055 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6056 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6057 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6058 chain of stmts and pointers:
6059 RELATED_STMT VEC_STMT
6060 VS1_0: vx0 = memref0 VS1_1 -
6061 VS1_1: vx1 = memref1 VS1_2 -
6062 VS1_2: vx2 = memref2 VS1_3 -
6063 VS1_3: vx3 = memref3 - -
6064 S1: x = load - VS1_0
6065 VS2_0: vz0 = vx0 + v1 VS2_1 -
6066 VS2_1: vz1 = vx1 + v1 VS2_2 -
6067 VS2_2: vz2 = vx2 + v1 VS2_3 -
6068 VS2_3: vz3 = vx3 + v1 - -
6069 S2: z = x + 1 - VS2_0 */
6071 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6072 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6073 /* Arguments are ready. Create the new vector stmt. */
6074 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6076 gimple *new_stmt = NULL;
6077 vop1 = ((op_type == binary_op || op_type == ternary_op)
6078 ? vec_oprnds1[i] : NULL_TREE);
6079 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6080 if (masked_loop_p && reduc_idx >= 0)
6082 /* Perform the operation on active elements only and take
6083 inactive elements from the reduction chain input. */
6084 gcc_assert (!vop2);
6085 vop2 = reduc_idx == 1 ? vop1 : vop0;
6086 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6087 vectype, i);
6088 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6089 vop0, vop1, vop2);
6090 new_temp = make_ssa_name (vec_dest, call);
6091 gimple_call_set_lhs (call, new_temp);
6092 gimple_call_set_nothrow (call, true);
6093 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6094 new_stmt = call;
6096 else
6098 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6099 new_temp = make_ssa_name (vec_dest, new_stmt);
6100 gimple_assign_set_lhs (new_stmt, new_temp);
6101 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6102 if (vec_cvt_dest)
6104 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6105 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6106 new_temp);
6107 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6108 gimple_assign_set_lhs (new_stmt, new_temp);
6109 vect_finish_stmt_generation (vinfo, stmt_info,
6110 new_stmt, gsi);
6113 if (slp_node)
6114 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6115 else
6116 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6119 if (!slp_node)
6120 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6122 vec_oprnds0.release ();
6123 vec_oprnds1.release ();
6124 vec_oprnds2.release ();
6126 return true;
6129 /* A helper function to ensure data reference DR_INFO's base alignment. */
6131 static void
6132 ensure_base_align (dr_vec_info *dr_info)
6134 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6135 return;
6137 if (dr_info->base_misaligned)
6139 tree base_decl = dr_info->base_decl;
6141 // We should only be able to increase the alignment of a base object if
6142 // we know what its new alignment should be at compile time.
6143 unsigned HOST_WIDE_INT align_base_to =
6144 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6146 if (decl_in_symtab_p (base_decl))
6147 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6148 else if (DECL_ALIGN (base_decl) < align_base_to)
6150 SET_DECL_ALIGN (base_decl, align_base_to);
6151 DECL_USER_ALIGN (base_decl) = 1;
6153 dr_info->base_misaligned = false;
6158 /* Function get_group_alias_ptr_type.
6160 Return the alias type for the group starting at FIRST_STMT_INFO. */
6162 static tree
6163 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6165 struct data_reference *first_dr, *next_dr;
6167 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6168 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6169 while (next_stmt_info)
6171 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6172 if (get_alias_set (DR_REF (first_dr))
6173 != get_alias_set (DR_REF (next_dr)))
6175 if (dump_enabled_p ())
6176 dump_printf_loc (MSG_NOTE, vect_location,
6177 "conflicting alias set types.\n");
6178 return ptr_type_node;
6180 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6182 return reference_alias_ptr_type (DR_REF (first_dr));
6186 /* Function scan_operand_equal_p.
6188 Helper function for check_scan_store. Compare two references
6189 with .GOMP_SIMD_LANE bases. */
6191 static bool
6192 scan_operand_equal_p (tree ref1, tree ref2)
6194 tree ref[2] = { ref1, ref2 };
6195 poly_int64 bitsize[2], bitpos[2];
6196 tree offset[2], base[2];
6197 for (int i = 0; i < 2; ++i)
6199 machine_mode mode;
6200 int unsignedp, reversep, volatilep = 0;
6201 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6202 &offset[i], &mode, &unsignedp,
6203 &reversep, &volatilep);
6204 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6205 return false;
6206 if (TREE_CODE (base[i]) == MEM_REF
6207 && offset[i] == NULL_TREE
6208 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6210 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6211 if (is_gimple_assign (def_stmt)
6212 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6213 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6214 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6216 if (maybe_ne (mem_ref_offset (base[i]), 0))
6217 return false;
6218 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6219 offset[i] = gimple_assign_rhs2 (def_stmt);
6224 if (!operand_equal_p (base[0], base[1], 0))
6225 return false;
6226 if (maybe_ne (bitsize[0], bitsize[1]))
6227 return false;
6228 if (offset[0] != offset[1])
6230 if (!offset[0] || !offset[1])
6231 return false;
6232 if (!operand_equal_p (offset[0], offset[1], 0))
6234 tree step[2];
6235 for (int i = 0; i < 2; ++i)
6237 step[i] = integer_one_node;
6238 if (TREE_CODE (offset[i]) == SSA_NAME)
6240 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6241 if (is_gimple_assign (def_stmt)
6242 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6243 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6244 == INTEGER_CST))
6246 step[i] = gimple_assign_rhs2 (def_stmt);
6247 offset[i] = gimple_assign_rhs1 (def_stmt);
6250 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6252 step[i] = TREE_OPERAND (offset[i], 1);
6253 offset[i] = TREE_OPERAND (offset[i], 0);
6255 tree rhs1 = NULL_TREE;
6256 if (TREE_CODE (offset[i]) == SSA_NAME)
6258 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6259 if (gimple_assign_cast_p (def_stmt))
6260 rhs1 = gimple_assign_rhs1 (def_stmt);
6262 else if (CONVERT_EXPR_P (offset[i]))
6263 rhs1 = TREE_OPERAND (offset[i], 0);
6264 if (rhs1
6265 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6266 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6267 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6268 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6269 offset[i] = rhs1;
6271 if (!operand_equal_p (offset[0], offset[1], 0)
6272 || !operand_equal_p (step[0], step[1], 0))
6273 return false;
6276 return true;
6280 enum scan_store_kind {
6281 /* Normal permutation. */
6282 scan_store_kind_perm,
6284 /* Whole vector left shift permutation with zero init. */
6285 scan_store_kind_lshift_zero,
6287 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6288 scan_store_kind_lshift_cond
6291 /* Function check_scan_store.
6293 Verify if we can perform the needed permutations or whole vector shifts.
6294 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6295 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6296 to do at each step. */
6298 static int
6299 scan_store_can_perm_p (tree vectype, tree init,
6300 vec<enum scan_store_kind> *use_whole_vector = NULL)
6302 enum machine_mode vec_mode = TYPE_MODE (vectype);
6303 unsigned HOST_WIDE_INT nunits;
6304 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6305 return -1;
6306 int units_log2 = exact_log2 (nunits);
6307 if (units_log2 <= 0)
6308 return -1;
6310 int i;
6311 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6312 for (i = 0; i <= units_log2; ++i)
6314 unsigned HOST_WIDE_INT j, k;
6315 enum scan_store_kind kind = scan_store_kind_perm;
6316 vec_perm_builder sel (nunits, nunits, 1);
6317 sel.quick_grow (nunits);
6318 if (i == units_log2)
6320 for (j = 0; j < nunits; ++j)
6321 sel[j] = nunits - 1;
6323 else
6325 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6326 sel[j] = j;
6327 for (k = 0; j < nunits; ++j, ++k)
6328 sel[j] = nunits + k;
6330 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6331 if (!can_vec_perm_const_p (vec_mode, indices))
6333 if (i == units_log2)
6334 return -1;
6336 if (whole_vector_shift_kind == scan_store_kind_perm)
6338 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6339 return -1;
6340 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6341 /* Whole vector shifts shift in zeros, so if init is all zero
6342 constant, there is no need to do anything further. */
6343 if ((TREE_CODE (init) != INTEGER_CST
6344 && TREE_CODE (init) != REAL_CST)
6345 || !initializer_zerop (init))
6347 tree masktype = truth_type_for (vectype);
6348 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6349 return -1;
6350 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6353 kind = whole_vector_shift_kind;
6355 if (use_whole_vector)
6357 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6358 use_whole_vector->safe_grow_cleared (i, true);
6359 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6360 use_whole_vector->safe_push (kind);
6364 return units_log2;
6368 /* Function check_scan_store.
6370 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6372 static bool
6373 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6374 enum vect_def_type rhs_dt, bool slp, tree mask,
6375 vect_memory_access_type memory_access_type)
6377 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6378 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6379 tree ref_type;
6381 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6382 if (slp
6383 || mask
6384 || memory_access_type != VMAT_CONTIGUOUS
6385 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6386 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6387 || loop_vinfo == NULL
6388 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6389 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6390 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6391 || !integer_zerop (DR_INIT (dr_info->dr))
6392 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6393 || !alias_sets_conflict_p (get_alias_set (vectype),
6394 get_alias_set (TREE_TYPE (ref_type))))
6396 if (dump_enabled_p ())
6397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6398 "unsupported OpenMP scan store.\n");
6399 return false;
6402 /* We need to pattern match code built by OpenMP lowering and simplified
6403 by following optimizations into something we can handle.
6404 #pragma omp simd reduction(inscan,+:r)
6405 for (...)
6407 r += something ();
6408 #pragma omp scan inclusive (r)
6409 use (r);
6411 shall have body with:
6412 // Initialization for input phase, store the reduction initializer:
6413 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6414 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6415 D.2042[_21] = 0;
6416 // Actual input phase:
6418 r.0_5 = D.2042[_20];
6419 _6 = _4 + r.0_5;
6420 D.2042[_20] = _6;
6421 // Initialization for scan phase:
6422 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6423 _26 = D.2043[_25];
6424 _27 = D.2042[_25];
6425 _28 = _26 + _27;
6426 D.2043[_25] = _28;
6427 D.2042[_25] = _28;
6428 // Actual scan phase:
6430 r.1_8 = D.2042[_20];
6432 The "omp simd array" variable D.2042 holds the privatized copy used
6433 inside of the loop and D.2043 is another one that holds copies of
6434 the current original list item. The separate GOMP_SIMD_LANE ifn
6435 kinds are there in order to allow optimizing the initializer store
6436 and combiner sequence, e.g. if it is originally some C++ish user
6437 defined reduction, but allow the vectorizer to pattern recognize it
6438 and turn into the appropriate vectorized scan.
6440 For exclusive scan, this is slightly different:
6441 #pragma omp simd reduction(inscan,+:r)
6442 for (...)
6444 use (r);
6445 #pragma omp scan exclusive (r)
6446 r += something ();
6448 shall have body with:
6449 // Initialization for input phase, store the reduction initializer:
6450 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6451 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6452 D.2042[_21] = 0;
6453 // Actual input phase:
6455 r.0_5 = D.2042[_20];
6456 _6 = _4 + r.0_5;
6457 D.2042[_20] = _6;
6458 // Initialization for scan phase:
6459 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6460 _26 = D.2043[_25];
6461 D.2044[_25] = _26;
6462 _27 = D.2042[_25];
6463 _28 = _26 + _27;
6464 D.2043[_25] = _28;
6465 // Actual scan phase:
6467 r.1_8 = D.2044[_20];
6468 ... */
6470 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6472 /* Match the D.2042[_21] = 0; store above. Just require that
6473 it is a constant or external definition store. */
6474 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6476 fail_init:
6477 if (dump_enabled_p ())
6478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6479 "unsupported OpenMP scan initializer store.\n");
6480 return false;
6483 if (! loop_vinfo->scan_map)
6484 loop_vinfo->scan_map = new hash_map<tree, tree>;
6485 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6486 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6487 if (cached)
6488 goto fail_init;
6489 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6491 /* These stores can be vectorized normally. */
6492 return true;
6495 if (rhs_dt != vect_internal_def)
6497 fail:
6498 if (dump_enabled_p ())
6499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6500 "unsupported OpenMP scan combiner pattern.\n");
6501 return false;
6504 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6505 tree rhs = gimple_assign_rhs1 (stmt);
6506 if (TREE_CODE (rhs) != SSA_NAME)
6507 goto fail;
6509 gimple *other_store_stmt = NULL;
6510 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6511 bool inscan_var_store
6512 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6514 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6516 if (!inscan_var_store)
6518 use_operand_p use_p;
6519 imm_use_iterator iter;
6520 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6522 gimple *use_stmt = USE_STMT (use_p);
6523 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6524 continue;
6525 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6526 || !is_gimple_assign (use_stmt)
6527 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6528 || other_store_stmt
6529 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6530 goto fail;
6531 other_store_stmt = use_stmt;
6533 if (other_store_stmt == NULL)
6534 goto fail;
6535 rhs = gimple_assign_lhs (other_store_stmt);
6536 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6537 goto fail;
6540 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6542 use_operand_p use_p;
6543 imm_use_iterator iter;
6544 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6546 gimple *use_stmt = USE_STMT (use_p);
6547 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6548 continue;
6549 if (other_store_stmt)
6550 goto fail;
6551 other_store_stmt = use_stmt;
6554 else
6555 goto fail;
6557 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6558 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6559 || !is_gimple_assign (def_stmt)
6560 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6561 goto fail;
6563 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6564 /* For pointer addition, we should use the normal plus for the vector
6565 operation. */
6566 switch (code)
6568 case POINTER_PLUS_EXPR:
6569 code = PLUS_EXPR;
6570 break;
6571 case MULT_HIGHPART_EXPR:
6572 goto fail;
6573 default:
6574 break;
6576 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6577 goto fail;
6579 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6580 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6581 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6582 goto fail;
6584 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6585 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6586 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6587 || !gimple_assign_load_p (load1_stmt)
6588 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6589 || !gimple_assign_load_p (load2_stmt))
6590 goto fail;
6592 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6593 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6594 if (load1_stmt_info == NULL
6595 || load2_stmt_info == NULL
6596 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6597 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6598 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6599 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6600 goto fail;
6602 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6604 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6605 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6606 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6607 goto fail;
6608 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6609 tree lrhs;
6610 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6611 lrhs = rhs1;
6612 else
6613 lrhs = rhs2;
6614 use_operand_p use_p;
6615 imm_use_iterator iter;
6616 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6618 gimple *use_stmt = USE_STMT (use_p);
6619 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6620 continue;
6621 if (other_store_stmt)
6622 goto fail;
6623 other_store_stmt = use_stmt;
6627 if (other_store_stmt == NULL)
6628 goto fail;
6629 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6630 || !gimple_store_p (other_store_stmt))
6631 goto fail;
6633 stmt_vec_info other_store_stmt_info
6634 = loop_vinfo->lookup_stmt (other_store_stmt);
6635 if (other_store_stmt_info == NULL
6636 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6637 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6638 goto fail;
6640 gimple *stmt1 = stmt;
6641 gimple *stmt2 = other_store_stmt;
6642 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6643 std::swap (stmt1, stmt2);
6644 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6645 gimple_assign_rhs1 (load2_stmt)))
6647 std::swap (rhs1, rhs2);
6648 std::swap (load1_stmt, load2_stmt);
6649 std::swap (load1_stmt_info, load2_stmt_info);
6651 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6652 gimple_assign_rhs1 (load1_stmt)))
6653 goto fail;
6655 tree var3 = NULL_TREE;
6656 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6657 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6658 gimple_assign_rhs1 (load2_stmt)))
6659 goto fail;
6660 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6662 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6663 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6664 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6665 goto fail;
6666 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6667 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6668 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6669 || lookup_attribute ("omp simd inscan exclusive",
6670 DECL_ATTRIBUTES (var3)))
6671 goto fail;
6674 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6675 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6676 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6677 goto fail;
6679 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6680 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6681 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6682 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6683 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6684 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6685 goto fail;
6687 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6688 std::swap (var1, var2);
6690 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6692 if (!lookup_attribute ("omp simd inscan exclusive",
6693 DECL_ATTRIBUTES (var1)))
6694 goto fail;
6695 var1 = var3;
6698 if (loop_vinfo->scan_map == NULL)
6699 goto fail;
6700 tree *init = loop_vinfo->scan_map->get (var1);
6701 if (init == NULL)
6702 goto fail;
6704 /* The IL is as expected, now check if we can actually vectorize it.
6705 Inclusive scan:
6706 _26 = D.2043[_25];
6707 _27 = D.2042[_25];
6708 _28 = _26 + _27;
6709 D.2043[_25] = _28;
6710 D.2042[_25] = _28;
6711 should be vectorized as (where _40 is the vectorized rhs
6712 from the D.2042[_21] = 0; store):
6713 _30 = MEM <vector(8) int> [(int *)&D.2043];
6714 _31 = MEM <vector(8) int> [(int *)&D.2042];
6715 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6716 _33 = _31 + _32;
6717 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6718 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6719 _35 = _33 + _34;
6720 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6721 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6722 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6723 _37 = _35 + _36;
6724 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6725 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6726 _38 = _30 + _37;
6727 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6728 MEM <vector(8) int> [(int *)&D.2043] = _39;
6729 MEM <vector(8) int> [(int *)&D.2042] = _38;
6730 Exclusive scan:
6731 _26 = D.2043[_25];
6732 D.2044[_25] = _26;
6733 _27 = D.2042[_25];
6734 _28 = _26 + _27;
6735 D.2043[_25] = _28;
6736 should be vectorized as (where _40 is the vectorized rhs
6737 from the D.2042[_21] = 0; store):
6738 _30 = MEM <vector(8) int> [(int *)&D.2043];
6739 _31 = MEM <vector(8) int> [(int *)&D.2042];
6740 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6741 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6742 _34 = _32 + _33;
6743 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6744 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6745 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6746 _36 = _34 + _35;
6747 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6748 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6749 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6750 _38 = _36 + _37;
6751 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6752 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6753 _39 = _30 + _38;
6754 _50 = _31 + _39;
6755 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6756 MEM <vector(8) int> [(int *)&D.2044] = _39;
6757 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6758 enum machine_mode vec_mode = TYPE_MODE (vectype);
6759 optab optab = optab_for_tree_code (code, vectype, optab_default);
6760 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6761 goto fail;
6763 int units_log2 = scan_store_can_perm_p (vectype, *init);
6764 if (units_log2 == -1)
6765 goto fail;
6767 return true;
6771 /* Function vectorizable_scan_store.
6773 Helper of vectorizable_score, arguments like on vectorizable_store.
6774 Handle only the transformation, checking is done in check_scan_store. */
6776 static bool
6777 vectorizable_scan_store (vec_info *vinfo,
6778 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6779 gimple **vec_stmt, int ncopies)
6781 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6782 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6783 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6784 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6786 if (dump_enabled_p ())
6787 dump_printf_loc (MSG_NOTE, vect_location,
6788 "transform scan store. ncopies = %d\n", ncopies);
6790 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6791 tree rhs = gimple_assign_rhs1 (stmt);
6792 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6794 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6795 bool inscan_var_store
6796 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6798 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6800 use_operand_p use_p;
6801 imm_use_iterator iter;
6802 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6804 gimple *use_stmt = USE_STMT (use_p);
6805 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6806 continue;
6807 rhs = gimple_assign_lhs (use_stmt);
6808 break;
6812 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6813 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6814 if (code == POINTER_PLUS_EXPR)
6815 code = PLUS_EXPR;
6816 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6817 && commutative_tree_code (code));
6818 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6819 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6820 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6821 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6822 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6823 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6824 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6825 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6826 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6827 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6828 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6830 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6832 std::swap (rhs1, rhs2);
6833 std::swap (var1, var2);
6834 std::swap (load1_dr_info, load2_dr_info);
6837 tree *init = loop_vinfo->scan_map->get (var1);
6838 gcc_assert (init);
6840 unsigned HOST_WIDE_INT nunits;
6841 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6842 gcc_unreachable ();
6843 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6844 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6845 gcc_assert (units_log2 > 0);
6846 auto_vec<tree, 16> perms;
6847 perms.quick_grow (units_log2 + 1);
6848 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6849 for (int i = 0; i <= units_log2; ++i)
6851 unsigned HOST_WIDE_INT j, k;
6852 vec_perm_builder sel (nunits, nunits, 1);
6853 sel.quick_grow (nunits);
6854 if (i == units_log2)
6855 for (j = 0; j < nunits; ++j)
6856 sel[j] = nunits - 1;
6857 else
6859 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6860 sel[j] = j;
6861 for (k = 0; j < nunits; ++j, ++k)
6862 sel[j] = nunits + k;
6864 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6865 if (!use_whole_vector.is_empty ()
6866 && use_whole_vector[i] != scan_store_kind_perm)
6868 if (zero_vec == NULL_TREE)
6869 zero_vec = build_zero_cst (vectype);
6870 if (masktype == NULL_TREE
6871 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6872 masktype = truth_type_for (vectype);
6873 perms[i] = vect_gen_perm_mask_any (vectype, indices);
6875 else
6876 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6879 tree vec_oprnd1 = NULL_TREE;
6880 tree vec_oprnd2 = NULL_TREE;
6881 tree vec_oprnd3 = NULL_TREE;
6882 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6883 tree dataref_offset = build_int_cst (ref_type, 0);
6884 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6885 vectype, VMAT_CONTIGUOUS);
6886 tree ldataref_ptr = NULL_TREE;
6887 tree orig = NULL_TREE;
6888 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6889 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6890 auto_vec<tree> vec_oprnds1;
6891 auto_vec<tree> vec_oprnds2;
6892 auto_vec<tree> vec_oprnds3;
6893 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6894 *init, &vec_oprnds1,
6895 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6896 rhs2, &vec_oprnds3);
6897 for (int j = 0; j < ncopies; j++)
6899 vec_oprnd1 = vec_oprnds1[j];
6900 if (ldataref_ptr == NULL)
6901 vec_oprnd2 = vec_oprnds2[j];
6902 vec_oprnd3 = vec_oprnds3[j];
6903 if (j == 0)
6904 orig = vec_oprnd3;
6905 else if (!inscan_var_store)
6906 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6908 if (ldataref_ptr)
6910 vec_oprnd2 = make_ssa_name (vectype);
6911 tree data_ref = fold_build2 (MEM_REF, vectype,
6912 unshare_expr (ldataref_ptr),
6913 dataref_offset);
6914 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
6915 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
6916 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6917 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6918 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6921 tree v = vec_oprnd2;
6922 for (int i = 0; i < units_log2; ++i)
6924 tree new_temp = make_ssa_name (vectype);
6925 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
6926 (zero_vec
6927 && (use_whole_vector[i]
6928 != scan_store_kind_perm))
6929 ? zero_vec : vec_oprnd1, v,
6930 perms[i]);
6931 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6932 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6933 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6935 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
6937 /* Whole vector shift shifted in zero bits, but if *init
6938 is not initializer_zerop, we need to replace those elements
6939 with elements from vec_oprnd1. */
6940 tree_vector_builder vb (masktype, nunits, 1);
6941 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
6942 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
6943 ? boolean_false_node : boolean_true_node);
6945 tree new_temp2 = make_ssa_name (vectype);
6946 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
6947 new_temp, vec_oprnd1);
6948 vect_finish_stmt_generation (vinfo, stmt_info,
6949 g, gsi);
6950 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6951 new_temp = new_temp2;
6954 /* For exclusive scan, perform the perms[i] permutation once
6955 more. */
6956 if (i == 0
6957 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
6958 && v == vec_oprnd2)
6960 v = new_temp;
6961 --i;
6962 continue;
6965 tree new_temp2 = make_ssa_name (vectype);
6966 g = gimple_build_assign (new_temp2, code, v, new_temp);
6967 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6968 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6970 v = new_temp2;
6973 tree new_temp = make_ssa_name (vectype);
6974 gimple *g = gimple_build_assign (new_temp, code, orig, v);
6975 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6976 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6978 tree last_perm_arg = new_temp;
6979 /* For exclusive scan, new_temp computed above is the exclusive scan
6980 prefix sum. Turn it into inclusive prefix sum for the broadcast
6981 of the last element into orig. */
6982 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6984 last_perm_arg = make_ssa_name (vectype);
6985 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
6986 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6987 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6990 orig = make_ssa_name (vectype);
6991 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
6992 last_perm_arg, perms[units_log2]);
6993 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6994 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6996 if (!inscan_var_store)
6998 tree data_ref = fold_build2 (MEM_REF, vectype,
6999 unshare_expr (dataref_ptr),
7000 dataref_offset);
7001 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7002 g = gimple_build_assign (data_ref, new_temp);
7003 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7004 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7008 if (inscan_var_store)
7009 for (int j = 0; j < ncopies; j++)
7011 if (j != 0)
7012 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7014 tree data_ref = fold_build2 (MEM_REF, vectype,
7015 unshare_expr (dataref_ptr),
7016 dataref_offset);
7017 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7018 gimple *g = gimple_build_assign (data_ref, orig);
7019 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7020 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7022 return true;
7026 /* Function vectorizable_store.
7028 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7029 that can be vectorized.
7030 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7031 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7032 Return true if STMT_INFO is vectorizable in this way. */
7034 static bool
7035 vectorizable_store (vec_info *vinfo,
7036 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7037 gimple **vec_stmt, slp_tree slp_node,
7038 stmt_vector_for_cost *cost_vec)
7040 tree data_ref;
7041 tree op;
7042 tree vec_oprnd = NULL_TREE;
7043 tree elem_type;
7044 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7045 class loop *loop = NULL;
7046 machine_mode vec_mode;
7047 tree dummy;
7048 enum vect_def_type rhs_dt = vect_unknown_def_type;
7049 enum vect_def_type mask_dt = vect_unknown_def_type;
7050 tree dataref_ptr = NULL_TREE;
7051 tree dataref_offset = NULL_TREE;
7052 gimple *ptr_incr = NULL;
7053 int ncopies;
7054 int j;
7055 stmt_vec_info first_stmt_info;
7056 bool grouped_store;
7057 unsigned int group_size, i;
7058 vec<tree> oprnds = vNULL;
7059 vec<tree> result_chain = vNULL;
7060 tree offset = NULL_TREE;
7061 vec<tree> vec_oprnds = vNULL;
7062 bool slp = (slp_node != NULL);
7063 unsigned int vec_num;
7064 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7065 tree aggr_type;
7066 gather_scatter_info gs_info;
7067 poly_uint64 vf;
7068 vec_load_store_type vls_type;
7069 tree ref_type;
7071 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7072 return false;
7074 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7075 && ! vec_stmt)
7076 return false;
7078 /* Is vectorizable store? */
7080 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7081 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7083 tree scalar_dest = gimple_assign_lhs (assign);
7084 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7085 && is_pattern_stmt_p (stmt_info))
7086 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7087 if (TREE_CODE (scalar_dest) != ARRAY_REF
7088 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7089 && TREE_CODE (scalar_dest) != INDIRECT_REF
7090 && TREE_CODE (scalar_dest) != COMPONENT_REF
7091 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7092 && TREE_CODE (scalar_dest) != REALPART_EXPR
7093 && TREE_CODE (scalar_dest) != MEM_REF)
7094 return false;
7096 else
7098 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7099 if (!call || !gimple_call_internal_p (call))
7100 return false;
7102 internal_fn ifn = gimple_call_internal_fn (call);
7103 if (!internal_store_fn_p (ifn))
7104 return false;
7106 if (slp_node != NULL)
7108 if (dump_enabled_p ())
7109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7110 "SLP of masked stores not supported.\n");
7111 return false;
7114 int mask_index = internal_fn_mask_index (ifn);
7115 if (mask_index >= 0)
7117 mask = gimple_call_arg (call, mask_index);
7118 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7119 &mask_vectype))
7120 return false;
7124 op = vect_get_store_rhs (stmt_info);
7126 /* Cannot have hybrid store SLP -- that would mean storing to the
7127 same location twice. */
7128 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7130 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7131 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7133 if (loop_vinfo)
7135 loop = LOOP_VINFO_LOOP (loop_vinfo);
7136 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7138 else
7139 vf = 1;
7141 /* Multiple types in SLP are handled by creating the appropriate number of
7142 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7143 case of SLP. */
7144 if (slp)
7145 ncopies = 1;
7146 else
7147 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7149 gcc_assert (ncopies >= 1);
7151 /* FORNOW. This restriction should be relaxed. */
7152 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7154 if (dump_enabled_p ())
7155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7156 "multiple types in nested loop.\n");
7157 return false;
7160 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7161 op, &rhs_dt, &rhs_vectype, &vls_type))
7162 return false;
7164 elem_type = TREE_TYPE (vectype);
7165 vec_mode = TYPE_MODE (vectype);
7167 if (!STMT_VINFO_DATA_REF (stmt_info))
7168 return false;
7170 vect_memory_access_type memory_access_type;
7171 enum dr_alignment_support alignment_support_scheme;
7172 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7173 ncopies, &memory_access_type,
7174 &alignment_support_scheme, &gs_info))
7175 return false;
7177 if (mask)
7179 if (memory_access_type == VMAT_CONTIGUOUS)
7181 if (!VECTOR_MODE_P (vec_mode)
7182 || !can_vec_mask_load_store_p (vec_mode,
7183 TYPE_MODE (mask_vectype), false))
7184 return false;
7186 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7187 && (memory_access_type != VMAT_GATHER_SCATTER
7188 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7190 if (dump_enabled_p ())
7191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7192 "unsupported access type for masked store.\n");
7193 return false;
7196 else
7198 /* FORNOW. In some cases can vectorize even if data-type not supported
7199 (e.g. - array initialization with 0). */
7200 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7201 return false;
7204 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7205 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7206 && memory_access_type != VMAT_GATHER_SCATTER
7207 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7208 if (grouped_store)
7210 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7211 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7212 group_size = DR_GROUP_SIZE (first_stmt_info);
7214 else
7216 first_stmt_info = stmt_info;
7217 first_dr_info = dr_info;
7218 group_size = vec_num = 1;
7221 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7223 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7224 memory_access_type))
7225 return false;
7228 if (!vec_stmt) /* transformation not required. */
7230 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7232 if (loop_vinfo
7233 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7234 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7235 group_size, memory_access_type,
7236 &gs_info, mask);
7238 if (slp_node
7239 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7240 vectype))
7242 if (dump_enabled_p ())
7243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7244 "incompatible vector types for invariants\n");
7245 return false;
7248 if (dump_enabled_p ()
7249 && memory_access_type != VMAT_ELEMENTWISE
7250 && memory_access_type != VMAT_GATHER_SCATTER
7251 && alignment_support_scheme != dr_aligned)
7252 dump_printf_loc (MSG_NOTE, vect_location,
7253 "Vectorizing an unaligned access.\n");
7255 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7256 vect_model_store_cost (vinfo, stmt_info, ncopies,
7257 memory_access_type, vls_type, slp_node, cost_vec);
7258 return true;
7260 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7262 /* Transform. */
7264 ensure_base_align (dr_info);
7266 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7268 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7269 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7270 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7271 tree ptr, var, scale, vec_mask;
7272 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7273 tree mask_halfvectype = mask_vectype;
7274 edge pe = loop_preheader_edge (loop);
7275 gimple_seq seq;
7276 basic_block new_bb;
7277 enum { NARROW, NONE, WIDEN } modifier;
7278 poly_uint64 scatter_off_nunits
7279 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7281 if (known_eq (nunits, scatter_off_nunits))
7282 modifier = NONE;
7283 else if (known_eq (nunits * 2, scatter_off_nunits))
7285 modifier = WIDEN;
7287 /* Currently gathers and scatters are only supported for
7288 fixed-length vectors. */
7289 unsigned int count = scatter_off_nunits.to_constant ();
7290 vec_perm_builder sel (count, count, 1);
7291 for (i = 0; i < (unsigned int) count; ++i)
7292 sel.quick_push (i | (count / 2));
7294 vec_perm_indices indices (sel, 1, count);
7295 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7296 indices);
7297 gcc_assert (perm_mask != NULL_TREE);
7299 else if (known_eq (nunits, scatter_off_nunits * 2))
7301 modifier = NARROW;
7303 /* Currently gathers and scatters are only supported for
7304 fixed-length vectors. */
7305 unsigned int count = nunits.to_constant ();
7306 vec_perm_builder sel (count, count, 1);
7307 for (i = 0; i < (unsigned int) count; ++i)
7308 sel.quick_push (i | (count / 2));
7310 vec_perm_indices indices (sel, 2, count);
7311 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7312 gcc_assert (perm_mask != NULL_TREE);
7313 ncopies *= 2;
7315 if (mask)
7316 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7318 else
7319 gcc_unreachable ();
7321 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7322 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7323 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7324 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7325 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7326 scaletype = TREE_VALUE (arglist);
7328 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7329 && TREE_CODE (rettype) == VOID_TYPE);
7331 ptr = fold_convert (ptrtype, gs_info.base);
7332 if (!is_gimple_min_invariant (ptr))
7334 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7335 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7336 gcc_assert (!new_bb);
7339 if (mask == NULL_TREE)
7341 mask_arg = build_int_cst (masktype, -1);
7342 mask_arg = vect_init_vector (vinfo, stmt_info,
7343 mask_arg, masktype, NULL);
7346 scale = build_int_cst (scaletype, gs_info.scale);
7348 auto_vec<tree> vec_oprnds0;
7349 auto_vec<tree> vec_oprnds1;
7350 auto_vec<tree> vec_masks;
7351 if (mask)
7353 tree mask_vectype = truth_type_for (vectype);
7354 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7355 modifier == NARROW
7356 ? ncopies / 2 : ncopies,
7357 mask, &vec_masks, mask_vectype);
7359 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7360 modifier == WIDEN
7361 ? ncopies / 2 : ncopies,
7362 gs_info.offset, &vec_oprnds0);
7363 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7364 modifier == NARROW
7365 ? ncopies / 2 : ncopies,
7366 op, &vec_oprnds1);
7367 for (j = 0; j < ncopies; ++j)
7369 if (modifier == WIDEN)
7371 if (j & 1)
7372 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7373 perm_mask, stmt_info, gsi);
7374 else
7375 op = vec_oprnd0 = vec_oprnds0[j / 2];
7376 src = vec_oprnd1 = vec_oprnds1[j];
7377 if (mask)
7378 mask_op = vec_mask = vec_masks[j];
7380 else if (modifier == NARROW)
7382 if (j & 1)
7383 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7384 perm_mask, stmt_info, gsi);
7385 else
7386 src = vec_oprnd1 = vec_oprnds1[j / 2];
7387 op = vec_oprnd0 = vec_oprnds0[j];
7388 if (mask)
7389 mask_op = vec_mask = vec_masks[j / 2];
7391 else
7393 op = vec_oprnd0 = vec_oprnds0[j];
7394 src = vec_oprnd1 = vec_oprnds1[j];
7395 if (mask)
7396 mask_op = vec_mask = vec_masks[j];
7399 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7401 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7402 TYPE_VECTOR_SUBPARTS (srctype)));
7403 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7404 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7405 gassign *new_stmt
7406 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7407 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7408 src = var;
7411 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7413 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7414 TYPE_VECTOR_SUBPARTS (idxtype)));
7415 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7416 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7417 gassign *new_stmt
7418 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7419 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7420 op = var;
7423 if (mask)
7425 tree utype;
7426 mask_arg = mask_op;
7427 if (modifier == NARROW)
7429 var = vect_get_new_ssa_name (mask_halfvectype,
7430 vect_simple_var);
7431 gassign *new_stmt
7432 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7433 : VEC_UNPACK_LO_EXPR,
7434 mask_op);
7435 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7436 mask_arg = var;
7438 tree optype = TREE_TYPE (mask_arg);
7439 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7440 utype = masktype;
7441 else
7442 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7443 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7444 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7445 gassign *new_stmt
7446 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7447 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7448 mask_arg = var;
7449 if (!useless_type_conversion_p (masktype, utype))
7451 gcc_assert (TYPE_PRECISION (utype)
7452 <= TYPE_PRECISION (masktype));
7453 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7454 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7455 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7456 mask_arg = var;
7460 gcall *new_stmt
7461 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7462 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7464 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7466 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7467 return true;
7469 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7470 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7472 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7473 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7475 if (grouped_store)
7477 /* FORNOW */
7478 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7480 /* We vectorize all the stmts of the interleaving group when we
7481 reach the last stmt in the group. */
7482 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7483 < DR_GROUP_SIZE (first_stmt_info)
7484 && !slp)
7486 *vec_stmt = NULL;
7487 return true;
7490 if (slp)
7492 grouped_store = false;
7493 /* VEC_NUM is the number of vect stmts to be created for this
7494 group. */
7495 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7496 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7497 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7498 == first_stmt_info);
7499 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7500 op = vect_get_store_rhs (first_stmt_info);
7502 else
7503 /* VEC_NUM is the number of vect stmts to be created for this
7504 group. */
7505 vec_num = group_size;
7507 ref_type = get_group_alias_ptr_type (first_stmt_info);
7509 else
7510 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7512 if (dump_enabled_p ())
7513 dump_printf_loc (MSG_NOTE, vect_location,
7514 "transform store. ncopies = %d\n", ncopies);
7516 if (memory_access_type == VMAT_ELEMENTWISE
7517 || memory_access_type == VMAT_STRIDED_SLP)
7519 gimple_stmt_iterator incr_gsi;
7520 bool insert_after;
7521 gimple *incr;
7522 tree offvar;
7523 tree ivstep;
7524 tree running_off;
7525 tree stride_base, stride_step, alias_off;
7526 tree vec_oprnd;
7527 tree dr_offset;
7528 unsigned int g;
7529 /* Checked by get_load_store_type. */
7530 unsigned int const_nunits = nunits.to_constant ();
7532 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7533 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7535 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7536 stride_base
7537 = fold_build_pointer_plus
7538 (DR_BASE_ADDRESS (first_dr_info->dr),
7539 size_binop (PLUS_EXPR,
7540 convert_to_ptrofftype (dr_offset),
7541 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7542 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7544 /* For a store with loop-invariant (but other than power-of-2)
7545 stride (i.e. not a grouped access) like so:
7547 for (i = 0; i < n; i += stride)
7548 array[i] = ...;
7550 we generate a new induction variable and new stores from
7551 the components of the (vectorized) rhs:
7553 for (j = 0; ; j += VF*stride)
7554 vectemp = ...;
7555 tmp1 = vectemp[0];
7556 array[j] = tmp1;
7557 tmp2 = vectemp[1];
7558 array[j + stride] = tmp2;
7562 unsigned nstores = const_nunits;
7563 unsigned lnel = 1;
7564 tree ltype = elem_type;
7565 tree lvectype = vectype;
7566 if (slp)
7568 if (group_size < const_nunits
7569 && const_nunits % group_size == 0)
7571 nstores = const_nunits / group_size;
7572 lnel = group_size;
7573 ltype = build_vector_type (elem_type, group_size);
7574 lvectype = vectype;
7576 /* First check if vec_extract optab doesn't support extraction
7577 of vector elts directly. */
7578 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7579 machine_mode vmode;
7580 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7581 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7582 group_size).exists (&vmode)
7583 || (convert_optab_handler (vec_extract_optab,
7584 TYPE_MODE (vectype), vmode)
7585 == CODE_FOR_nothing))
7587 /* Try to avoid emitting an extract of vector elements
7588 by performing the extracts using an integer type of the
7589 same size, extracting from a vector of those and then
7590 re-interpreting it as the original vector type if
7591 supported. */
7592 unsigned lsize
7593 = group_size * GET_MODE_BITSIZE (elmode);
7594 unsigned int lnunits = const_nunits / group_size;
7595 /* If we can't construct such a vector fall back to
7596 element extracts from the original vector type and
7597 element size stores. */
7598 if (int_mode_for_size (lsize, 0).exists (&elmode)
7599 && VECTOR_MODE_P (TYPE_MODE (vectype))
7600 && related_vector_mode (TYPE_MODE (vectype), elmode,
7601 lnunits).exists (&vmode)
7602 && (convert_optab_handler (vec_extract_optab,
7603 vmode, elmode)
7604 != CODE_FOR_nothing))
7606 nstores = lnunits;
7607 lnel = group_size;
7608 ltype = build_nonstandard_integer_type (lsize, 1);
7609 lvectype = build_vector_type (ltype, nstores);
7611 /* Else fall back to vector extraction anyway.
7612 Fewer stores are more important than avoiding spilling
7613 of the vector we extract from. Compared to the
7614 construction case in vectorizable_load no store-forwarding
7615 issue exists here for reasonable archs. */
7618 else if (group_size >= const_nunits
7619 && group_size % const_nunits == 0)
7621 nstores = 1;
7622 lnel = const_nunits;
7623 ltype = vectype;
7624 lvectype = vectype;
7626 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7627 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7630 ivstep = stride_step;
7631 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7632 build_int_cst (TREE_TYPE (ivstep), vf));
7634 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7636 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7637 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7638 create_iv (stride_base, ivstep, NULL,
7639 loop, &incr_gsi, insert_after,
7640 &offvar, NULL);
7641 incr = gsi_stmt (incr_gsi);
7643 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7645 alias_off = build_int_cst (ref_type, 0);
7646 stmt_vec_info next_stmt_info = first_stmt_info;
7647 for (g = 0; g < group_size; g++)
7649 running_off = offvar;
7650 if (g)
7652 tree size = TYPE_SIZE_UNIT (ltype);
7653 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7654 size);
7655 tree newoff = copy_ssa_name (running_off, NULL);
7656 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7657 running_off, pos);
7658 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7659 running_off = newoff;
7661 if (!slp)
7662 op = vect_get_store_rhs (next_stmt_info);
7663 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7664 op, &vec_oprnds);
7665 unsigned int group_el = 0;
7666 unsigned HOST_WIDE_INT
7667 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7668 for (j = 0; j < ncopies; j++)
7670 vec_oprnd = vec_oprnds[j];
7671 /* Pun the vector to extract from if necessary. */
7672 if (lvectype != vectype)
7674 tree tem = make_ssa_name (lvectype);
7675 gimple *pun
7676 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7677 lvectype, vec_oprnd));
7678 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7679 vec_oprnd = tem;
7681 for (i = 0; i < nstores; i++)
7683 tree newref, newoff;
7684 gimple *incr, *assign;
7685 tree size = TYPE_SIZE (ltype);
7686 /* Extract the i'th component. */
7687 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7688 bitsize_int (i), size);
7689 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7690 size, pos);
7692 elem = force_gimple_operand_gsi (gsi, elem, true,
7693 NULL_TREE, true,
7694 GSI_SAME_STMT);
7696 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7697 group_el * elsz);
7698 newref = build2 (MEM_REF, ltype,
7699 running_off, this_off);
7700 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7702 /* And store it to *running_off. */
7703 assign = gimple_build_assign (newref, elem);
7704 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7706 group_el += lnel;
7707 if (! slp
7708 || group_el == group_size)
7710 newoff = copy_ssa_name (running_off, NULL);
7711 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7712 running_off, stride_step);
7713 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7715 running_off = newoff;
7716 group_el = 0;
7718 if (g == group_size - 1
7719 && !slp)
7721 if (j == 0 && i == 0)
7722 *vec_stmt = assign;
7723 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7727 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7728 if (slp)
7729 break;
7732 vec_oprnds.release ();
7733 return true;
7736 auto_vec<tree> dr_chain (group_size);
7737 oprnds.create (group_size);
7739 /* Gather-scatter accesses perform only component accesses, alignment
7740 is irrelevant for them. */
7741 if (memory_access_type == VMAT_GATHER_SCATTER)
7742 alignment_support_scheme = dr_unaligned_supported;
7743 else
7744 alignment_support_scheme
7745 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7747 gcc_assert (alignment_support_scheme);
7748 vec_loop_masks *loop_masks
7749 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7750 ? &LOOP_VINFO_MASKS (loop_vinfo)
7751 : NULL);
7752 vec_loop_lens *loop_lens
7753 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7754 ? &LOOP_VINFO_LENS (loop_vinfo)
7755 : NULL);
7757 /* Shouldn't go with length-based approach if fully masked. */
7758 gcc_assert (!loop_lens || !loop_masks);
7760 /* Targets with store-lane instructions must not require explicit
7761 realignment. vect_supportable_dr_alignment always returns either
7762 dr_aligned or dr_unaligned_supported for masked operations. */
7763 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7764 && !mask
7765 && !loop_masks)
7766 || alignment_support_scheme == dr_aligned
7767 || alignment_support_scheme == dr_unaligned_supported);
7769 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7770 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7771 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7773 tree bump;
7774 tree vec_offset = NULL_TREE;
7775 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7777 aggr_type = NULL_TREE;
7778 bump = NULL_TREE;
7780 else if (memory_access_type == VMAT_GATHER_SCATTER)
7782 aggr_type = elem_type;
7783 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7784 &bump, &vec_offset);
7786 else
7788 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7789 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7790 else
7791 aggr_type = vectype;
7792 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7793 memory_access_type);
7796 if (mask)
7797 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7799 /* In case the vectorization factor (VF) is bigger than the number
7800 of elements that we can fit in a vectype (nunits), we have to generate
7801 more than one vector stmt - i.e - we need to "unroll" the
7802 vector stmt by a factor VF/nunits. */
7804 /* In case of interleaving (non-unit grouped access):
7806 S1: &base + 2 = x2
7807 S2: &base = x0
7808 S3: &base + 1 = x1
7809 S4: &base + 3 = x3
7811 We create vectorized stores starting from base address (the access of the
7812 first stmt in the chain (S2 in the above example), when the last store stmt
7813 of the chain (S4) is reached:
7815 VS1: &base = vx2
7816 VS2: &base + vec_size*1 = vx0
7817 VS3: &base + vec_size*2 = vx1
7818 VS4: &base + vec_size*3 = vx3
7820 Then permutation statements are generated:
7822 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7823 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7826 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7827 (the order of the data-refs in the output of vect_permute_store_chain
7828 corresponds to the order of scalar stmts in the interleaving chain - see
7829 the documentation of vect_permute_store_chain()).
7831 In case of both multiple types and interleaving, above vector stores and
7832 permutation stmts are created for every copy. The result vector stmts are
7833 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7834 STMT_VINFO_RELATED_STMT for the next copies.
7837 auto_vec<tree> vec_masks;
7838 tree vec_mask = NULL;
7839 auto_vec<tree> vec_offsets;
7840 auto_vec<vec<tree> > gvec_oprnds;
7841 gvec_oprnds.safe_grow_cleared (group_size, true);
7842 for (j = 0; j < ncopies; j++)
7844 gimple *new_stmt;
7845 if (j == 0)
7847 if (slp)
7849 /* Get vectorized arguments for SLP_NODE. */
7850 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7851 op, &vec_oprnds);
7852 vec_oprnd = vec_oprnds[0];
7854 else
7856 /* For interleaved stores we collect vectorized defs for all the
7857 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7858 used as an input to vect_permute_store_chain().
7860 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7861 and OPRNDS are of size 1. */
7862 stmt_vec_info next_stmt_info = first_stmt_info;
7863 for (i = 0; i < group_size; i++)
7865 /* Since gaps are not supported for interleaved stores,
7866 DR_GROUP_SIZE is the exact number of stmts in the chain.
7867 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7868 that there is no interleaving, DR_GROUP_SIZE is 1,
7869 and only one iteration of the loop will be executed. */
7870 op = vect_get_store_rhs (next_stmt_info);
7871 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7872 ncopies, op, &gvec_oprnds[i]);
7873 vec_oprnd = gvec_oprnds[i][0];
7874 dr_chain.quick_push (gvec_oprnds[i][0]);
7875 oprnds.quick_push (gvec_oprnds[i][0]);
7876 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7878 if (mask)
7880 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7881 mask, &vec_masks, mask_vectype);
7882 vec_mask = vec_masks[0];
7886 /* We should have catched mismatched types earlier. */
7887 gcc_assert (useless_type_conversion_p (vectype,
7888 TREE_TYPE (vec_oprnd)));
7889 bool simd_lane_access_p
7890 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7891 if (simd_lane_access_p
7892 && !loop_masks
7893 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7894 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7895 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7896 && integer_zerop (DR_INIT (first_dr_info->dr))
7897 && alias_sets_conflict_p (get_alias_set (aggr_type),
7898 get_alias_set (TREE_TYPE (ref_type))))
7900 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7901 dataref_offset = build_int_cst (ref_type, 0);
7903 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7905 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
7906 &dataref_ptr, &vec_offsets, ncopies);
7907 vec_offset = vec_offsets[0];
7909 else
7910 dataref_ptr
7911 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
7912 simd_lane_access_p ? loop : NULL,
7913 offset, &dummy, gsi, &ptr_incr,
7914 simd_lane_access_p, NULL_TREE, bump);
7916 else
7918 /* For interleaved stores we created vectorized defs for all the
7919 defs stored in OPRNDS in the previous iteration (previous copy).
7920 DR_CHAIN is then used as an input to vect_permute_store_chain().
7921 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7922 OPRNDS are of size 1. */
7923 for (i = 0; i < group_size; i++)
7925 vec_oprnd = gvec_oprnds[i][j];
7926 dr_chain[i] = gvec_oprnds[i][j];
7927 oprnds[i] = gvec_oprnds[i][j];
7929 if (mask)
7930 vec_mask = vec_masks[j];
7931 if (dataref_offset)
7932 dataref_offset
7933 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7934 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7935 vec_offset = vec_offsets[j];
7936 else
7937 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
7938 stmt_info, bump);
7941 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7943 tree vec_array;
7945 /* Get an array into which we can store the individual vectors. */
7946 vec_array = create_vector_array (vectype, vec_num);
7948 /* Invalidate the current contents of VEC_ARRAY. This should
7949 become an RTL clobber too, which prevents the vector registers
7950 from being upward-exposed. */
7951 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7953 /* Store the individual vectors into the array. */
7954 for (i = 0; i < vec_num; i++)
7956 vec_oprnd = dr_chain[i];
7957 write_vector_array (vinfo, stmt_info,
7958 gsi, vec_oprnd, vec_array, i);
7961 tree final_mask = NULL;
7962 if (loop_masks)
7963 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7964 vectype, j);
7965 if (vec_mask)
7966 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7967 vec_mask, gsi);
7969 gcall *call;
7970 if (final_mask)
7972 /* Emit:
7973 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7974 VEC_ARRAY). */
7975 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7976 tree alias_ptr = build_int_cst (ref_type, align);
7977 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7978 dataref_ptr, alias_ptr,
7979 final_mask, vec_array);
7981 else
7983 /* Emit:
7984 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7985 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7986 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7987 vec_array);
7988 gimple_call_set_lhs (call, data_ref);
7990 gimple_call_set_nothrow (call, true);
7991 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7992 new_stmt = call;
7994 /* Record that VEC_ARRAY is now dead. */
7995 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7997 else
7999 new_stmt = NULL;
8000 if (grouped_store)
8002 if (j == 0)
8003 result_chain.create (group_size);
8004 /* Permute. */
8005 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8006 gsi, &result_chain);
8009 stmt_vec_info next_stmt_info = first_stmt_info;
8010 for (i = 0; i < vec_num; i++)
8012 unsigned misalign;
8013 unsigned HOST_WIDE_INT align;
8015 tree final_mask = NULL_TREE;
8016 if (loop_masks)
8017 final_mask = vect_get_loop_mask (gsi, loop_masks,
8018 vec_num * ncopies,
8019 vectype, vec_num * j + i);
8020 if (vec_mask)
8021 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8022 vec_mask, gsi);
8024 if (memory_access_type == VMAT_GATHER_SCATTER)
8026 tree scale = size_int (gs_info.scale);
8027 gcall *call;
8028 if (loop_masks)
8029 call = gimple_build_call_internal
8030 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8031 scale, vec_oprnd, final_mask);
8032 else
8033 call = gimple_build_call_internal
8034 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8035 scale, vec_oprnd);
8036 gimple_call_set_nothrow (call, true);
8037 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8038 new_stmt = call;
8039 break;
8042 if (i > 0)
8043 /* Bump the vector pointer. */
8044 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8045 gsi, stmt_info, bump);
8047 if (slp)
8048 vec_oprnd = vec_oprnds[i];
8049 else if (grouped_store)
8050 /* For grouped stores vectorized defs are interleaved in
8051 vect_permute_store_chain(). */
8052 vec_oprnd = result_chain[i];
8054 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8055 if (aligned_access_p (first_dr_info))
8056 misalign = 0;
8057 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8059 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8060 misalign = 0;
8062 else
8063 misalign = DR_MISALIGNMENT (first_dr_info);
8064 if (dataref_offset == NULL_TREE
8065 && TREE_CODE (dataref_ptr) == SSA_NAME)
8066 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8067 misalign);
8069 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8071 tree perm_mask = perm_mask_for_reverse (vectype);
8072 tree perm_dest = vect_create_destination_var
8073 (vect_get_store_rhs (stmt_info), vectype);
8074 tree new_temp = make_ssa_name (perm_dest);
8076 /* Generate the permute statement. */
8077 gimple *perm_stmt
8078 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8079 vec_oprnd, perm_mask);
8080 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8082 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8083 vec_oprnd = new_temp;
8086 /* Arguments are ready. Create the new vector stmt. */
8087 if (final_mask)
8089 align = least_bit_hwi (misalign | align);
8090 tree ptr = build_int_cst (ref_type, align);
8091 gcall *call
8092 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8093 dataref_ptr, ptr,
8094 final_mask, vec_oprnd);
8095 gimple_call_set_nothrow (call, true);
8096 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8097 new_stmt = call;
8099 else if (loop_lens)
8101 tree final_len
8102 = vect_get_loop_len (loop_vinfo, loop_lens,
8103 vec_num * ncopies, vec_num * j + i);
8104 align = least_bit_hwi (misalign | align);
8105 tree ptr = build_int_cst (ref_type, align);
8106 machine_mode vmode = TYPE_MODE (vectype);
8107 opt_machine_mode new_ovmode
8108 = get_len_load_store_mode (vmode, false);
8109 machine_mode new_vmode = new_ovmode.require ();
8110 /* Need conversion if it's wrapped with VnQI. */
8111 if (vmode != new_vmode)
8113 tree new_vtype
8114 = build_vector_type_for_mode (unsigned_intQI_type_node,
8115 new_vmode);
8116 tree var
8117 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8118 vec_oprnd
8119 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8120 gassign *new_stmt
8121 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8122 vec_oprnd);
8123 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8124 gsi);
8125 vec_oprnd = var;
8127 gcall *call
8128 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8129 ptr, final_len, vec_oprnd);
8130 gimple_call_set_nothrow (call, true);
8131 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8132 new_stmt = call;
8134 else
8136 data_ref = fold_build2 (MEM_REF, vectype,
8137 dataref_ptr,
8138 dataref_offset
8139 ? dataref_offset
8140 : build_int_cst (ref_type, 0));
8141 if (aligned_access_p (first_dr_info))
8143 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8144 TREE_TYPE (data_ref)
8145 = build_aligned_type (TREE_TYPE (data_ref),
8146 align * BITS_PER_UNIT);
8147 else
8148 TREE_TYPE (data_ref)
8149 = build_aligned_type (TREE_TYPE (data_ref),
8150 TYPE_ALIGN (elem_type));
8151 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8152 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8153 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8156 if (slp)
8157 continue;
8159 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8160 if (!next_stmt_info)
8161 break;
8164 if (!slp)
8166 if (j == 0)
8167 *vec_stmt = new_stmt;
8168 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8172 for (i = 0; i < group_size; ++i)
8174 vec<tree> oprndsi = gvec_oprnds[i];
8175 oprndsi.release ();
8177 oprnds.release ();
8178 result_chain.release ();
8179 vec_oprnds.release ();
8181 return true;
8184 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8185 VECTOR_CST mask. No checks are made that the target platform supports the
8186 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8187 vect_gen_perm_mask_checked. */
8189 tree
8190 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8192 tree mask_type;
8194 poly_uint64 nunits = sel.length ();
8195 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8197 mask_type = build_vector_type (ssizetype, nunits);
8198 return vec_perm_indices_to_tree (mask_type, sel);
8201 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8202 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8204 tree
8205 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8207 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8208 return vect_gen_perm_mask_any (vectype, sel);
8211 /* Given a vector variable X and Y, that was generated for the scalar
8212 STMT_INFO, generate instructions to permute the vector elements of X and Y
8213 using permutation mask MASK_VEC, insert them at *GSI and return the
8214 permuted vector variable. */
8216 static tree
8217 permute_vec_elements (vec_info *vinfo,
8218 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8219 gimple_stmt_iterator *gsi)
8221 tree vectype = TREE_TYPE (x);
8222 tree perm_dest, data_ref;
8223 gimple *perm_stmt;
8225 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8226 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8227 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8228 else
8229 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8230 data_ref = make_ssa_name (perm_dest);
8232 /* Generate the permute statement. */
8233 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8234 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8236 return data_ref;
8239 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8240 inserting them on the loops preheader edge. Returns true if we
8241 were successful in doing so (and thus STMT_INFO can be moved then),
8242 otherwise returns false. */
8244 static bool
8245 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8247 ssa_op_iter i;
8248 tree op;
8249 bool any = false;
8251 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8253 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8254 if (!gimple_nop_p (def_stmt)
8255 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8257 /* Make sure we don't need to recurse. While we could do
8258 so in simple cases when there are more complex use webs
8259 we don't have an easy way to preserve stmt order to fulfil
8260 dependencies within them. */
8261 tree op2;
8262 ssa_op_iter i2;
8263 if (gimple_code (def_stmt) == GIMPLE_PHI)
8264 return false;
8265 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8267 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8268 if (!gimple_nop_p (def_stmt2)
8269 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8270 return false;
8272 any = true;
8276 if (!any)
8277 return true;
8279 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8281 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8282 if (!gimple_nop_p (def_stmt)
8283 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8285 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8286 gsi_remove (&gsi, false);
8287 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8291 return true;
8294 /* vectorizable_load.
8296 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8297 that can be vectorized.
8298 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8299 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8300 Return true if STMT_INFO is vectorizable in this way. */
8302 static bool
8303 vectorizable_load (vec_info *vinfo,
8304 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8305 gimple **vec_stmt, slp_tree slp_node,
8306 stmt_vector_for_cost *cost_vec)
8308 tree scalar_dest;
8309 tree vec_dest = NULL;
8310 tree data_ref = NULL;
8311 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8312 class loop *loop = NULL;
8313 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8314 bool nested_in_vect_loop = false;
8315 tree elem_type;
8316 tree new_temp;
8317 machine_mode mode;
8318 tree dummy;
8319 tree dataref_ptr = NULL_TREE;
8320 tree dataref_offset = NULL_TREE;
8321 gimple *ptr_incr = NULL;
8322 int ncopies;
8323 int i, j;
8324 unsigned int group_size;
8325 poly_uint64 group_gap_adj;
8326 tree msq = NULL_TREE, lsq;
8327 tree offset = NULL_TREE;
8328 tree byte_offset = NULL_TREE;
8329 tree realignment_token = NULL_TREE;
8330 gphi *phi = NULL;
8331 vec<tree> dr_chain = vNULL;
8332 bool grouped_load = false;
8333 stmt_vec_info first_stmt_info;
8334 stmt_vec_info first_stmt_info_for_drptr = NULL;
8335 bool compute_in_loop = false;
8336 class loop *at_loop;
8337 int vec_num;
8338 bool slp = (slp_node != NULL);
8339 bool slp_perm = false;
8340 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8341 poly_uint64 vf;
8342 tree aggr_type;
8343 gather_scatter_info gs_info;
8344 tree ref_type;
8345 enum vect_def_type mask_dt = vect_unknown_def_type;
8347 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8348 return false;
8350 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8351 && ! vec_stmt)
8352 return false;
8354 if (!STMT_VINFO_DATA_REF (stmt_info))
8355 return false;
8357 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8358 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8359 which can be different when reduction chains were re-ordered.
8360 Now that we figured we're a dataref reset stmt_info back to
8361 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8362 refactored in a way to maintain the dr_vec_info pointer for the
8363 relevant access explicitely. */
8364 stmt_vec_info orig_stmt_info = stmt_info;
8365 if (slp_node)
8366 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8368 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8369 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8371 scalar_dest = gimple_assign_lhs (assign);
8372 if (TREE_CODE (scalar_dest) != SSA_NAME)
8373 return false;
8375 tree_code code = gimple_assign_rhs_code (assign);
8376 if (code != ARRAY_REF
8377 && code != BIT_FIELD_REF
8378 && code != INDIRECT_REF
8379 && code != COMPONENT_REF
8380 && code != IMAGPART_EXPR
8381 && code != REALPART_EXPR
8382 && code != MEM_REF
8383 && TREE_CODE_CLASS (code) != tcc_declaration)
8384 return false;
8386 else
8388 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8389 if (!call || !gimple_call_internal_p (call))
8390 return false;
8392 internal_fn ifn = gimple_call_internal_fn (call);
8393 if (!internal_load_fn_p (ifn))
8394 return false;
8396 scalar_dest = gimple_call_lhs (call);
8397 if (!scalar_dest)
8398 return false;
8400 int mask_index = internal_fn_mask_index (ifn);
8401 if (mask_index >= 0)
8403 mask = gimple_call_arg (call, mask_index);
8404 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8405 &mask_vectype))
8406 return false;
8410 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8411 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8413 if (loop_vinfo)
8415 loop = LOOP_VINFO_LOOP (loop_vinfo);
8416 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8417 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8419 else
8420 vf = 1;
8422 /* Multiple types in SLP are handled by creating the appropriate number of
8423 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8424 case of SLP. */
8425 if (slp)
8426 ncopies = 1;
8427 else
8428 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8430 gcc_assert (ncopies >= 1);
8432 /* FORNOW. This restriction should be relaxed. */
8433 if (nested_in_vect_loop && ncopies > 1)
8435 if (dump_enabled_p ())
8436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8437 "multiple types in nested loop.\n");
8438 return false;
8441 /* Invalidate assumptions made by dependence analysis when vectorization
8442 on the unrolled body effectively re-orders stmts. */
8443 if (ncopies > 1
8444 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8445 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8446 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8448 if (dump_enabled_p ())
8449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8450 "cannot perform implicit CSE when unrolling "
8451 "with negative dependence distance\n");
8452 return false;
8455 elem_type = TREE_TYPE (vectype);
8456 mode = TYPE_MODE (vectype);
8458 /* FORNOW. In some cases can vectorize even if data-type not supported
8459 (e.g. - data copies). */
8460 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8462 if (dump_enabled_p ())
8463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8464 "Aligned load, but unsupported type.\n");
8465 return false;
8468 /* Check if the load is a part of an interleaving chain. */
8469 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8471 grouped_load = true;
8472 /* FORNOW */
8473 gcc_assert (!nested_in_vect_loop);
8474 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8476 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8477 group_size = DR_GROUP_SIZE (first_stmt_info);
8479 /* Refuse non-SLP vectorization of SLP-only groups. */
8480 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8482 if (dump_enabled_p ())
8483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8484 "cannot vectorize load in non-SLP mode.\n");
8485 return false;
8488 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8490 slp_perm = true;
8492 if (!loop_vinfo)
8494 /* In BB vectorization we may not actually use a loaded vector
8495 accessing elements in excess of DR_GROUP_SIZE. */
8496 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8497 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8498 unsigned HOST_WIDE_INT nunits;
8499 unsigned j, k, maxk = 0;
8500 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8501 if (k > maxk)
8502 maxk = k;
8503 tree vectype = STMT_VINFO_VECTYPE (group_info);
8504 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8505 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8507 if (dump_enabled_p ())
8508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8509 "BB vectorization with gaps at the end of "
8510 "a load is not supported\n");
8511 return false;
8515 auto_vec<tree> tem;
8516 unsigned n_perms;
8517 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8518 true, &n_perms))
8520 if (dump_enabled_p ())
8521 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8522 vect_location,
8523 "unsupported load permutation\n");
8524 return false;
8528 /* Invalidate assumptions made by dependence analysis when vectorization
8529 on the unrolled body effectively re-orders stmts. */
8530 if (!PURE_SLP_STMT (stmt_info)
8531 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8532 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8533 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8535 if (dump_enabled_p ())
8536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8537 "cannot perform implicit CSE when performing "
8538 "group loads with negative dependence distance\n");
8539 return false;
8542 else
8543 group_size = 1;
8545 vect_memory_access_type memory_access_type;
8546 enum dr_alignment_support alignment_support_scheme;
8547 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8548 ncopies, &memory_access_type,
8549 &alignment_support_scheme, &gs_info))
8550 return false;
8552 if (mask)
8554 if (memory_access_type == VMAT_CONTIGUOUS)
8556 machine_mode vec_mode = TYPE_MODE (vectype);
8557 if (!VECTOR_MODE_P (vec_mode)
8558 || !can_vec_mask_load_store_p (vec_mode,
8559 TYPE_MODE (mask_vectype), true))
8560 return false;
8562 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8563 && memory_access_type != VMAT_GATHER_SCATTER)
8565 if (dump_enabled_p ())
8566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8567 "unsupported access type for masked load.\n");
8568 return false;
8572 if (!vec_stmt) /* transformation not required. */
8574 if (!slp)
8575 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8577 if (loop_vinfo
8578 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8579 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8580 group_size, memory_access_type,
8581 &gs_info, mask);
8583 if (dump_enabled_p ()
8584 && memory_access_type != VMAT_ELEMENTWISE
8585 && memory_access_type != VMAT_GATHER_SCATTER
8586 && alignment_support_scheme != dr_aligned)
8587 dump_printf_loc (MSG_NOTE, vect_location,
8588 "Vectorizing an unaligned access.\n");
8590 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8591 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8592 slp_node, cost_vec);
8593 return true;
8596 if (!slp)
8597 gcc_assert (memory_access_type
8598 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8600 if (dump_enabled_p ())
8601 dump_printf_loc (MSG_NOTE, vect_location,
8602 "transform load. ncopies = %d\n", ncopies);
8604 /* Transform. */
8606 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8607 ensure_base_align (dr_info);
8609 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8611 vect_build_gather_load_calls (vinfo,
8612 stmt_info, gsi, vec_stmt, &gs_info, mask);
8613 return true;
8616 if (memory_access_type == VMAT_INVARIANT)
8618 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8619 /* If we have versioned for aliasing or the loop doesn't
8620 have any data dependencies that would preclude this,
8621 then we are sure this is a loop invariant load and
8622 thus we can insert it on the preheader edge. */
8623 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8624 && !nested_in_vect_loop
8625 && hoist_defs_of_uses (stmt_info, loop));
8626 if (hoist_p)
8628 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8629 if (dump_enabled_p ())
8630 dump_printf_loc (MSG_NOTE, vect_location,
8631 "hoisting out of the vectorized loop: %G", stmt);
8632 scalar_dest = copy_ssa_name (scalar_dest);
8633 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8634 gsi_insert_on_edge_immediate
8635 (loop_preheader_edge (loop),
8636 gimple_build_assign (scalar_dest, rhs));
8638 /* These copies are all equivalent, but currently the representation
8639 requires a separate STMT_VINFO_VEC_STMT for each one. */
8640 gimple_stmt_iterator gsi2 = *gsi;
8641 gsi_next (&gsi2);
8642 for (j = 0; j < ncopies; j++)
8644 if (hoist_p)
8645 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8646 vectype, NULL);
8647 else
8648 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8649 vectype, &gsi2);
8650 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8651 if (slp)
8652 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8653 else
8655 if (j == 0)
8656 *vec_stmt = new_stmt;
8657 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8660 return true;
8663 if (memory_access_type == VMAT_ELEMENTWISE
8664 || memory_access_type == VMAT_STRIDED_SLP)
8666 gimple_stmt_iterator incr_gsi;
8667 bool insert_after;
8668 tree offvar;
8669 tree ivstep;
8670 tree running_off;
8671 vec<constructor_elt, va_gc> *v = NULL;
8672 tree stride_base, stride_step, alias_off;
8673 /* Checked by get_load_store_type. */
8674 unsigned int const_nunits = nunits.to_constant ();
8675 unsigned HOST_WIDE_INT cst_offset = 0;
8676 tree dr_offset;
8678 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8679 gcc_assert (!nested_in_vect_loop);
8681 if (grouped_load)
8683 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8684 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8686 else
8688 first_stmt_info = stmt_info;
8689 first_dr_info = dr_info;
8691 if (slp && grouped_load)
8693 group_size = DR_GROUP_SIZE (first_stmt_info);
8694 ref_type = get_group_alias_ptr_type (first_stmt_info);
8696 else
8698 if (grouped_load)
8699 cst_offset
8700 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8701 * vect_get_place_in_interleaving_chain (stmt_info,
8702 first_stmt_info));
8703 group_size = 1;
8704 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8707 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8708 stride_base
8709 = fold_build_pointer_plus
8710 (DR_BASE_ADDRESS (first_dr_info->dr),
8711 size_binop (PLUS_EXPR,
8712 convert_to_ptrofftype (dr_offset),
8713 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8714 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8716 /* For a load with loop-invariant (but other than power-of-2)
8717 stride (i.e. not a grouped access) like so:
8719 for (i = 0; i < n; i += stride)
8720 ... = array[i];
8722 we generate a new induction variable and new accesses to
8723 form a new vector (or vectors, depending on ncopies):
8725 for (j = 0; ; j += VF*stride)
8726 tmp1 = array[j];
8727 tmp2 = array[j + stride];
8729 vectemp = {tmp1, tmp2, ...}
8732 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8733 build_int_cst (TREE_TYPE (stride_step), vf));
8735 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8737 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8738 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8739 create_iv (stride_base, ivstep, NULL,
8740 loop, &incr_gsi, insert_after,
8741 &offvar, NULL);
8743 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8745 running_off = offvar;
8746 alias_off = build_int_cst (ref_type, 0);
8747 int nloads = const_nunits;
8748 int lnel = 1;
8749 tree ltype = TREE_TYPE (vectype);
8750 tree lvectype = vectype;
8751 auto_vec<tree> dr_chain;
8752 if (memory_access_type == VMAT_STRIDED_SLP)
8754 if (group_size < const_nunits)
8756 /* First check if vec_init optab supports construction from vector
8757 elts directly. Otherwise avoid emitting a constructor of
8758 vector elements by performing the loads using an integer type
8759 of the same size, constructing a vector of those and then
8760 re-interpreting it as the original vector type. This avoids a
8761 huge runtime penalty due to the general inability to perform
8762 store forwarding from smaller stores to a larger load. */
8763 tree ptype;
8764 tree vtype
8765 = vector_vector_composition_type (vectype,
8766 const_nunits / group_size,
8767 &ptype);
8768 if (vtype != NULL_TREE)
8770 nloads = const_nunits / group_size;
8771 lnel = group_size;
8772 lvectype = vtype;
8773 ltype = ptype;
8776 else
8778 nloads = 1;
8779 lnel = const_nunits;
8780 ltype = vectype;
8782 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8784 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8785 else if (nloads == 1)
8786 ltype = vectype;
8788 if (slp)
8790 /* For SLP permutation support we need to load the whole group,
8791 not only the number of vector stmts the permutation result
8792 fits in. */
8793 if (slp_perm)
8795 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8796 variable VF. */
8797 unsigned int const_vf = vf.to_constant ();
8798 ncopies = CEIL (group_size * const_vf, const_nunits);
8799 dr_chain.create (ncopies);
8801 else
8802 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8804 unsigned int group_el = 0;
8805 unsigned HOST_WIDE_INT
8806 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8807 for (j = 0; j < ncopies; j++)
8809 if (nloads > 1)
8810 vec_alloc (v, nloads);
8811 gimple *new_stmt = NULL;
8812 for (i = 0; i < nloads; i++)
8814 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8815 group_el * elsz + cst_offset);
8816 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8817 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8818 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8819 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8820 if (nloads > 1)
8821 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8822 gimple_assign_lhs (new_stmt));
8824 group_el += lnel;
8825 if (! slp
8826 || group_el == group_size)
8828 tree newoff = copy_ssa_name (running_off);
8829 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8830 running_off, stride_step);
8831 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8833 running_off = newoff;
8834 group_el = 0;
8837 if (nloads > 1)
8839 tree vec_inv = build_constructor (lvectype, v);
8840 new_temp = vect_init_vector (vinfo, stmt_info,
8841 vec_inv, lvectype, gsi);
8842 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8843 if (lvectype != vectype)
8845 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8846 VIEW_CONVERT_EXPR,
8847 build1 (VIEW_CONVERT_EXPR,
8848 vectype, new_temp));
8849 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8853 if (slp)
8855 if (slp_perm)
8856 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8857 else
8858 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8860 else
8862 if (j == 0)
8863 *vec_stmt = new_stmt;
8864 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8867 if (slp_perm)
8869 unsigned n_perms;
8870 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8871 false, &n_perms);
8873 return true;
8876 if (memory_access_type == VMAT_GATHER_SCATTER
8877 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8878 grouped_load = false;
8880 if (grouped_load)
8882 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8883 group_size = DR_GROUP_SIZE (first_stmt_info);
8884 /* For SLP vectorization we directly vectorize a subchain
8885 without permutation. */
8886 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8887 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8888 /* For BB vectorization always use the first stmt to base
8889 the data ref pointer on. */
8890 if (bb_vinfo)
8891 first_stmt_info_for_drptr
8892 = vect_find_first_scalar_stmt_in_slp (slp_node);
8894 /* Check if the chain of loads is already vectorized. */
8895 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
8896 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8897 ??? But we can only do so if there is exactly one
8898 as we have no way to get at the rest. Leave the CSE
8899 opportunity alone.
8900 ??? With the group load eventually participating
8901 in multiple different permutations (having multiple
8902 slp nodes which refer to the same group) the CSE
8903 is even wrong code. See PR56270. */
8904 && !slp)
8906 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8907 return true;
8909 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8910 group_gap_adj = 0;
8912 /* VEC_NUM is the number of vect stmts to be created for this group. */
8913 if (slp)
8915 grouped_load = false;
8916 /* If an SLP permutation is from N elements to N elements,
8917 and if one vector holds a whole number of N, we can load
8918 the inputs to the permutation in the same way as an
8919 unpermuted sequence. In other cases we need to load the
8920 whole group, not only the number of vector stmts the
8921 permutation result fits in. */
8922 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
8923 if (slp_perm
8924 && (group_size != scalar_lanes
8925 || !multiple_p (nunits, group_size)))
8927 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8928 variable VF; see vect_transform_slp_perm_load. */
8929 unsigned int const_vf = vf.to_constant ();
8930 unsigned int const_nunits = nunits.to_constant ();
8931 vec_num = CEIL (group_size * const_vf, const_nunits);
8932 group_gap_adj = vf * group_size - nunits * vec_num;
8934 else
8936 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8937 group_gap_adj
8938 = group_size - scalar_lanes;
8941 else
8942 vec_num = group_size;
8944 ref_type = get_group_alias_ptr_type (first_stmt_info);
8946 else
8948 first_stmt_info = stmt_info;
8949 first_dr_info = dr_info;
8950 group_size = vec_num = 1;
8951 group_gap_adj = 0;
8952 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8955 gcc_assert (alignment_support_scheme);
8956 vec_loop_masks *loop_masks
8957 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8958 ? &LOOP_VINFO_MASKS (loop_vinfo)
8959 : NULL);
8960 vec_loop_lens *loop_lens
8961 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8962 ? &LOOP_VINFO_LENS (loop_vinfo)
8963 : NULL);
8965 /* Shouldn't go with length-based approach if fully masked. */
8966 gcc_assert (!loop_lens || !loop_masks);
8968 /* Targets with store-lane instructions must not require explicit
8969 realignment. vect_supportable_dr_alignment always returns either
8970 dr_aligned or dr_unaligned_supported for masked operations. */
8971 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8972 && !mask
8973 && !loop_masks)
8974 || alignment_support_scheme == dr_aligned
8975 || alignment_support_scheme == dr_unaligned_supported);
8977 /* In case the vectorization factor (VF) is bigger than the number
8978 of elements that we can fit in a vectype (nunits), we have to generate
8979 more than one vector stmt - i.e - we need to "unroll" the
8980 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8981 from one copy of the vector stmt to the next, in the field
8982 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8983 stages to find the correct vector defs to be used when vectorizing
8984 stmts that use the defs of the current stmt. The example below
8985 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8986 need to create 4 vectorized stmts):
8988 before vectorization:
8989 RELATED_STMT VEC_STMT
8990 S1: x = memref - -
8991 S2: z = x + 1 - -
8993 step 1: vectorize stmt S1:
8994 We first create the vector stmt VS1_0, and, as usual, record a
8995 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8996 Next, we create the vector stmt VS1_1, and record a pointer to
8997 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8998 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8999 stmts and pointers:
9000 RELATED_STMT VEC_STMT
9001 VS1_0: vx0 = memref0 VS1_1 -
9002 VS1_1: vx1 = memref1 VS1_2 -
9003 VS1_2: vx2 = memref2 VS1_3 -
9004 VS1_3: vx3 = memref3 - -
9005 S1: x = load - VS1_0
9006 S2: z = x + 1 - -
9009 /* In case of interleaving (non-unit grouped access):
9011 S1: x2 = &base + 2
9012 S2: x0 = &base
9013 S3: x1 = &base + 1
9014 S4: x3 = &base + 3
9016 Vectorized loads are created in the order of memory accesses
9017 starting from the access of the first stmt of the chain:
9019 VS1: vx0 = &base
9020 VS2: vx1 = &base + vec_size*1
9021 VS3: vx3 = &base + vec_size*2
9022 VS4: vx4 = &base + vec_size*3
9024 Then permutation statements are generated:
9026 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9027 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9030 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9031 (the order of the data-refs in the output of vect_permute_load_chain
9032 corresponds to the order of scalar stmts in the interleaving chain - see
9033 the documentation of vect_permute_load_chain()).
9034 The generation of permutation stmts and recording them in
9035 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9037 In case of both multiple types and interleaving, the vector loads and
9038 permutation stmts above are created for every copy. The result vector
9039 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9040 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9042 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9043 on a target that supports unaligned accesses (dr_unaligned_supported)
9044 we generate the following code:
9045 p = initial_addr;
9046 indx = 0;
9047 loop {
9048 p = p + indx * vectype_size;
9049 vec_dest = *(p);
9050 indx = indx + 1;
9053 Otherwise, the data reference is potentially unaligned on a target that
9054 does not support unaligned accesses (dr_explicit_realign_optimized) -
9055 then generate the following code, in which the data in each iteration is
9056 obtained by two vector loads, one from the previous iteration, and one
9057 from the current iteration:
9058 p1 = initial_addr;
9059 msq_init = *(floor(p1))
9060 p2 = initial_addr + VS - 1;
9061 realignment_token = call target_builtin;
9062 indx = 0;
9063 loop {
9064 p2 = p2 + indx * vectype_size
9065 lsq = *(floor(p2))
9066 vec_dest = realign_load (msq, lsq, realignment_token)
9067 indx = indx + 1;
9068 msq = lsq;
9069 } */
9071 /* If the misalignment remains the same throughout the execution of the
9072 loop, we can create the init_addr and permutation mask at the loop
9073 preheader. Otherwise, it needs to be created inside the loop.
9074 This can only occur when vectorizing memory accesses in the inner-loop
9075 nested within an outer-loop that is being vectorized. */
9077 if (nested_in_vect_loop
9078 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9079 GET_MODE_SIZE (TYPE_MODE (vectype))))
9081 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9082 compute_in_loop = true;
9085 bool diff_first_stmt_info
9086 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9088 if ((alignment_support_scheme == dr_explicit_realign_optimized
9089 || alignment_support_scheme == dr_explicit_realign)
9090 && !compute_in_loop)
9092 /* If we have different first_stmt_info, we can't set up realignment
9093 here, since we can't guarantee first_stmt_info DR has been
9094 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9095 distance from first_stmt_info DR instead as below. */
9096 if (!diff_first_stmt_info)
9097 msq = vect_setup_realignment (vinfo,
9098 first_stmt_info, gsi, &realignment_token,
9099 alignment_support_scheme, NULL_TREE,
9100 &at_loop);
9101 if (alignment_support_scheme == dr_explicit_realign_optimized)
9103 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9104 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9105 size_one_node);
9106 gcc_assert (!first_stmt_info_for_drptr);
9109 else
9110 at_loop = loop;
9112 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9113 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9115 tree bump;
9116 tree vec_offset = NULL_TREE;
9117 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9119 aggr_type = NULL_TREE;
9120 bump = NULL_TREE;
9122 else if (memory_access_type == VMAT_GATHER_SCATTER)
9124 aggr_type = elem_type;
9125 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9126 &bump, &vec_offset);
9128 else
9130 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9131 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9132 else
9133 aggr_type = vectype;
9134 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9135 memory_access_type);
9138 vec<tree> vec_offsets = vNULL;
9139 auto_vec<tree> vec_masks;
9140 if (mask)
9141 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9142 mask, &vec_masks, mask_vectype, NULL_TREE);
9143 tree vec_mask = NULL_TREE;
9144 poly_uint64 group_elt = 0;
9145 for (j = 0; j < ncopies; j++)
9147 /* 1. Create the vector or array pointer update chain. */
9148 if (j == 0)
9150 bool simd_lane_access_p
9151 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9152 if (simd_lane_access_p
9153 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9154 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9155 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9156 && integer_zerop (DR_INIT (first_dr_info->dr))
9157 && alias_sets_conflict_p (get_alias_set (aggr_type),
9158 get_alias_set (TREE_TYPE (ref_type)))
9159 && (alignment_support_scheme == dr_aligned
9160 || alignment_support_scheme == dr_unaligned_supported))
9162 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9163 dataref_offset = build_int_cst (ref_type, 0);
9165 else if (diff_first_stmt_info)
9167 dataref_ptr
9168 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9169 aggr_type, at_loop, offset, &dummy,
9170 gsi, &ptr_incr, simd_lane_access_p,
9171 byte_offset, bump);
9172 /* Adjust the pointer by the difference to first_stmt. */
9173 data_reference_p ptrdr
9174 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9175 tree diff
9176 = fold_convert (sizetype,
9177 size_binop (MINUS_EXPR,
9178 DR_INIT (first_dr_info->dr),
9179 DR_INIT (ptrdr)));
9180 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9181 stmt_info, diff);
9182 if (alignment_support_scheme == dr_explicit_realign)
9184 msq = vect_setup_realignment (vinfo,
9185 first_stmt_info_for_drptr, gsi,
9186 &realignment_token,
9187 alignment_support_scheme,
9188 dataref_ptr, &at_loop);
9189 gcc_assert (!compute_in_loop);
9192 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9194 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9195 &dataref_ptr, &vec_offsets, ncopies);
9196 vec_offset = vec_offsets[0];
9198 else
9199 dataref_ptr
9200 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9201 at_loop,
9202 offset, &dummy, gsi, &ptr_incr,
9203 simd_lane_access_p,
9204 byte_offset, bump);
9205 if (mask)
9206 vec_mask = vec_masks[0];
9208 else
9210 if (dataref_offset)
9211 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9212 bump);
9213 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9214 vec_offset = vec_offsets[j];
9215 else
9216 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9217 stmt_info, bump);
9218 if (mask)
9219 vec_mask = vec_masks[j];
9222 if (grouped_load || slp_perm)
9223 dr_chain.create (vec_num);
9225 gimple *new_stmt = NULL;
9226 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9228 tree vec_array;
9230 vec_array = create_vector_array (vectype, vec_num);
9232 tree final_mask = NULL_TREE;
9233 if (loop_masks)
9234 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9235 vectype, j);
9236 if (vec_mask)
9237 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9238 vec_mask, gsi);
9240 gcall *call;
9241 if (final_mask)
9243 /* Emit:
9244 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9245 VEC_MASK). */
9246 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9247 tree alias_ptr = build_int_cst (ref_type, align);
9248 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9249 dataref_ptr, alias_ptr,
9250 final_mask);
9252 else
9254 /* Emit:
9255 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9256 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9257 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9259 gimple_call_set_lhs (call, vec_array);
9260 gimple_call_set_nothrow (call, true);
9261 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9262 new_stmt = call;
9264 /* Extract each vector into an SSA_NAME. */
9265 for (i = 0; i < vec_num; i++)
9267 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9268 vec_array, i);
9269 dr_chain.quick_push (new_temp);
9272 /* Record the mapping between SSA_NAMEs and statements. */
9273 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9275 /* Record that VEC_ARRAY is now dead. */
9276 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9278 else
9280 for (i = 0; i < vec_num; i++)
9282 tree final_mask = NULL_TREE;
9283 if (loop_masks
9284 && memory_access_type != VMAT_INVARIANT)
9285 final_mask = vect_get_loop_mask (gsi, loop_masks,
9286 vec_num * ncopies,
9287 vectype, vec_num * j + i);
9288 if (vec_mask)
9289 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9290 vec_mask, gsi);
9292 if (i > 0)
9293 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9294 gsi, stmt_info, bump);
9296 /* 2. Create the vector-load in the loop. */
9297 switch (alignment_support_scheme)
9299 case dr_aligned:
9300 case dr_unaligned_supported:
9302 unsigned int misalign;
9303 unsigned HOST_WIDE_INT align;
9305 if (memory_access_type == VMAT_GATHER_SCATTER)
9307 tree zero = build_zero_cst (vectype);
9308 tree scale = size_int (gs_info.scale);
9309 gcall *call;
9310 if (loop_masks)
9311 call = gimple_build_call_internal
9312 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9313 vec_offset, scale, zero, final_mask);
9314 else
9315 call = gimple_build_call_internal
9316 (IFN_GATHER_LOAD, 4, dataref_ptr,
9317 vec_offset, scale, zero);
9318 gimple_call_set_nothrow (call, true);
9319 new_stmt = call;
9320 data_ref = NULL_TREE;
9321 break;
9324 align =
9325 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9326 if (alignment_support_scheme == dr_aligned)
9328 gcc_assert (aligned_access_p (first_dr_info));
9329 misalign = 0;
9331 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9333 align = dr_alignment
9334 (vect_dr_behavior (vinfo, first_dr_info));
9335 misalign = 0;
9337 else
9338 misalign = DR_MISALIGNMENT (first_dr_info);
9339 if (dataref_offset == NULL_TREE
9340 && TREE_CODE (dataref_ptr) == SSA_NAME)
9341 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9342 align, misalign);
9344 if (final_mask)
9346 align = least_bit_hwi (misalign | align);
9347 tree ptr = build_int_cst (ref_type, align);
9348 gcall *call
9349 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9350 dataref_ptr, ptr,
9351 final_mask);
9352 gimple_call_set_nothrow (call, true);
9353 new_stmt = call;
9354 data_ref = NULL_TREE;
9356 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9358 tree final_len
9359 = vect_get_loop_len (loop_vinfo, loop_lens,
9360 vec_num * ncopies,
9361 vec_num * j + i);
9362 align = least_bit_hwi (misalign | align);
9363 tree ptr = build_int_cst (ref_type, align);
9364 gcall *call
9365 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9366 dataref_ptr, ptr,
9367 final_len);
9368 gimple_call_set_nothrow (call, true);
9369 new_stmt = call;
9370 data_ref = NULL_TREE;
9372 /* Need conversion if it's wrapped with VnQI. */
9373 machine_mode vmode = TYPE_MODE (vectype);
9374 opt_machine_mode new_ovmode
9375 = get_len_load_store_mode (vmode, true);
9376 machine_mode new_vmode = new_ovmode.require ();
9377 if (vmode != new_vmode)
9379 tree qi_type = unsigned_intQI_type_node;
9380 tree new_vtype
9381 = build_vector_type_for_mode (qi_type, new_vmode);
9382 tree var = vect_get_new_ssa_name (new_vtype,
9383 vect_simple_var);
9384 gimple_set_lhs (call, var);
9385 vect_finish_stmt_generation (vinfo, stmt_info, call,
9386 gsi);
9387 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9388 new_stmt
9389 = gimple_build_assign (vec_dest,
9390 VIEW_CONVERT_EXPR, op);
9393 else
9395 tree ltype = vectype;
9396 tree new_vtype = NULL_TREE;
9397 unsigned HOST_WIDE_INT gap
9398 = DR_GROUP_GAP (first_stmt_info);
9399 unsigned int vect_align
9400 = vect_known_alignment_in_bytes (first_dr_info);
9401 unsigned int scalar_dr_size
9402 = vect_get_scalar_dr_size (first_dr_info);
9403 /* If there's no peeling for gaps but we have a gap
9404 with slp loads then load the lower half of the
9405 vector only. See get_group_load_store_type for
9406 when we apply this optimization. */
9407 if (slp
9408 && loop_vinfo
9409 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9410 && gap != 0
9411 && known_eq (nunits, (group_size - gap) * 2)
9412 && known_eq (nunits, group_size)
9413 && gap >= (vect_align / scalar_dr_size))
9415 tree half_vtype;
9416 new_vtype
9417 = vector_vector_composition_type (vectype, 2,
9418 &half_vtype);
9419 if (new_vtype != NULL_TREE)
9420 ltype = half_vtype;
9422 tree offset
9423 = (dataref_offset ? dataref_offset
9424 : build_int_cst (ref_type, 0));
9425 if (ltype != vectype
9426 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9428 unsigned HOST_WIDE_INT gap_offset
9429 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9430 tree gapcst = build_int_cst (ref_type, gap_offset);
9431 offset = size_binop (PLUS_EXPR, offset, gapcst);
9433 data_ref
9434 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9435 if (alignment_support_scheme == dr_aligned)
9437 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9438 TREE_TYPE (data_ref)
9439 = build_aligned_type (TREE_TYPE (data_ref),
9440 align * BITS_PER_UNIT);
9441 else
9442 TREE_TYPE (data_ref)
9443 = build_aligned_type (TREE_TYPE (data_ref),
9444 TYPE_ALIGN (elem_type));
9445 if (ltype != vectype)
9447 vect_copy_ref_info (data_ref,
9448 DR_REF (first_dr_info->dr));
9449 tree tem = make_ssa_name (ltype);
9450 new_stmt = gimple_build_assign (tem, data_ref);
9451 vect_finish_stmt_generation (vinfo, stmt_info,
9452 new_stmt, gsi);
9453 data_ref = NULL;
9454 vec<constructor_elt, va_gc> *v;
9455 vec_alloc (v, 2);
9456 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9458 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9459 build_zero_cst (ltype));
9460 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9462 else
9464 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9465 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9466 build_zero_cst (ltype));
9468 gcc_assert (new_vtype != NULL_TREE);
9469 if (new_vtype == vectype)
9470 new_stmt = gimple_build_assign (
9471 vec_dest, build_constructor (vectype, v));
9472 else
9474 tree new_vname = make_ssa_name (new_vtype);
9475 new_stmt = gimple_build_assign (
9476 new_vname, build_constructor (new_vtype, v));
9477 vect_finish_stmt_generation (vinfo, stmt_info,
9478 new_stmt, gsi);
9479 new_stmt = gimple_build_assign (
9480 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9481 new_vname));
9485 break;
9487 case dr_explicit_realign:
9489 tree ptr, bump;
9491 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9493 if (compute_in_loop)
9494 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9495 &realignment_token,
9496 dr_explicit_realign,
9497 dataref_ptr, NULL);
9499 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9500 ptr = copy_ssa_name (dataref_ptr);
9501 else
9502 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9503 // For explicit realign the target alignment should be
9504 // known at compile time.
9505 unsigned HOST_WIDE_INT align =
9506 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9507 new_stmt = gimple_build_assign
9508 (ptr, BIT_AND_EXPR, dataref_ptr,
9509 build_int_cst
9510 (TREE_TYPE (dataref_ptr),
9511 -(HOST_WIDE_INT) align));
9512 vect_finish_stmt_generation (vinfo, stmt_info,
9513 new_stmt, gsi);
9514 data_ref
9515 = build2 (MEM_REF, vectype, ptr,
9516 build_int_cst (ref_type, 0));
9517 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9518 vec_dest = vect_create_destination_var (scalar_dest,
9519 vectype);
9520 new_stmt = gimple_build_assign (vec_dest, data_ref);
9521 new_temp = make_ssa_name (vec_dest, new_stmt);
9522 gimple_assign_set_lhs (new_stmt, new_temp);
9523 gimple_move_vops (new_stmt, stmt_info->stmt);
9524 vect_finish_stmt_generation (vinfo, stmt_info,
9525 new_stmt, gsi);
9526 msq = new_temp;
9528 bump = size_binop (MULT_EXPR, vs,
9529 TYPE_SIZE_UNIT (elem_type));
9530 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9531 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9532 stmt_info, bump);
9533 new_stmt = gimple_build_assign
9534 (NULL_TREE, BIT_AND_EXPR, ptr,
9535 build_int_cst
9536 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9537 ptr = copy_ssa_name (ptr, new_stmt);
9538 gimple_assign_set_lhs (new_stmt, ptr);
9539 vect_finish_stmt_generation (vinfo, stmt_info,
9540 new_stmt, gsi);
9541 data_ref
9542 = build2 (MEM_REF, vectype, ptr,
9543 build_int_cst (ref_type, 0));
9544 break;
9546 case dr_explicit_realign_optimized:
9548 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9549 new_temp = copy_ssa_name (dataref_ptr);
9550 else
9551 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9552 // We should only be doing this if we know the target
9553 // alignment at compile time.
9554 unsigned HOST_WIDE_INT align =
9555 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9556 new_stmt = gimple_build_assign
9557 (new_temp, BIT_AND_EXPR, dataref_ptr,
9558 build_int_cst (TREE_TYPE (dataref_ptr),
9559 -(HOST_WIDE_INT) align));
9560 vect_finish_stmt_generation (vinfo, stmt_info,
9561 new_stmt, gsi);
9562 data_ref
9563 = build2 (MEM_REF, vectype, new_temp,
9564 build_int_cst (ref_type, 0));
9565 break;
9567 default:
9568 gcc_unreachable ();
9570 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9571 /* DATA_REF is null if we've already built the statement. */
9572 if (data_ref)
9574 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9575 new_stmt = gimple_build_assign (vec_dest, data_ref);
9577 new_temp = make_ssa_name (vec_dest, new_stmt);
9578 gimple_set_lhs (new_stmt, new_temp);
9579 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9581 /* 3. Handle explicit realignment if necessary/supported.
9582 Create in loop:
9583 vec_dest = realign_load (msq, lsq, realignment_token) */
9584 if (alignment_support_scheme == dr_explicit_realign_optimized
9585 || alignment_support_scheme == dr_explicit_realign)
9587 lsq = gimple_assign_lhs (new_stmt);
9588 if (!realignment_token)
9589 realignment_token = dataref_ptr;
9590 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9591 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9592 msq, lsq, realignment_token);
9593 new_temp = make_ssa_name (vec_dest, new_stmt);
9594 gimple_assign_set_lhs (new_stmt, new_temp);
9595 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9597 if (alignment_support_scheme == dr_explicit_realign_optimized)
9599 gcc_assert (phi);
9600 if (i == vec_num - 1 && j == ncopies - 1)
9601 add_phi_arg (phi, lsq,
9602 loop_latch_edge (containing_loop),
9603 UNKNOWN_LOCATION);
9604 msq = lsq;
9608 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9610 tree perm_mask = perm_mask_for_reverse (vectype);
9611 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9612 perm_mask, stmt_info, gsi);
9613 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9616 /* Collect vector loads and later create their permutation in
9617 vect_transform_grouped_load (). */
9618 if (grouped_load || slp_perm)
9619 dr_chain.quick_push (new_temp);
9621 /* Store vector loads in the corresponding SLP_NODE. */
9622 if (slp && !slp_perm)
9623 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9625 /* With SLP permutation we load the gaps as well, without
9626 we need to skip the gaps after we manage to fully load
9627 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9628 group_elt += nunits;
9629 if (maybe_ne (group_gap_adj, 0U)
9630 && !slp_perm
9631 && known_eq (group_elt, group_size - group_gap_adj))
9633 poly_wide_int bump_val
9634 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9635 * group_gap_adj);
9636 tree bump = wide_int_to_tree (sizetype, bump_val);
9637 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9638 gsi, stmt_info, bump);
9639 group_elt = 0;
9642 /* Bump the vector pointer to account for a gap or for excess
9643 elements loaded for a permuted SLP load. */
9644 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9646 poly_wide_int bump_val
9647 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9648 * group_gap_adj);
9649 tree bump = wide_int_to_tree (sizetype, bump_val);
9650 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9651 stmt_info, bump);
9655 if (slp && !slp_perm)
9656 continue;
9658 if (slp_perm)
9660 unsigned n_perms;
9661 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9662 gsi, vf, false, &n_perms);
9663 gcc_assert (ok);
9665 else
9667 if (grouped_load)
9669 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9670 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9671 group_size, gsi);
9672 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9674 else
9676 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9679 dr_chain.release ();
9681 if (!slp)
9682 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9684 return true;
9687 /* Function vect_is_simple_cond.
9689 Input:
9690 LOOP - the loop that is being vectorized.
9691 COND - Condition that is checked for simple use.
9693 Output:
9694 *COMP_VECTYPE - the vector type for the comparison.
9695 *DTS - The def types for the arguments of the comparison
9697 Returns whether a COND can be vectorized. Checks whether
9698 condition operands are supportable using vec_is_simple_use. */
9700 static bool
9701 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9702 slp_tree slp_node, tree *comp_vectype,
9703 enum vect_def_type *dts, tree vectype)
9705 tree lhs, rhs;
9706 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9707 slp_tree slp_op;
9709 /* Mask case. */
9710 if (TREE_CODE (cond) == SSA_NAME
9711 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9713 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9714 &slp_op, &dts[0], comp_vectype)
9715 || !*comp_vectype
9716 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9717 return false;
9718 return true;
9721 if (!COMPARISON_CLASS_P (cond))
9722 return false;
9724 lhs = TREE_OPERAND (cond, 0);
9725 rhs = TREE_OPERAND (cond, 1);
9727 if (TREE_CODE (lhs) == SSA_NAME)
9729 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9730 &lhs, &slp_op, &dts[0], &vectype1))
9731 return false;
9733 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9734 || TREE_CODE (lhs) == FIXED_CST)
9735 dts[0] = vect_constant_def;
9736 else
9737 return false;
9739 if (TREE_CODE (rhs) == SSA_NAME)
9741 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9742 &rhs, &slp_op, &dts[1], &vectype2))
9743 return false;
9745 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9746 || TREE_CODE (rhs) == FIXED_CST)
9747 dts[1] = vect_constant_def;
9748 else
9749 return false;
9751 if (vectype1 && vectype2
9752 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9753 TYPE_VECTOR_SUBPARTS (vectype2)))
9754 return false;
9756 *comp_vectype = vectype1 ? vectype1 : vectype2;
9757 /* Invariant comparison. */
9758 if (! *comp_vectype)
9760 tree scalar_type = TREE_TYPE (lhs);
9761 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9762 *comp_vectype = truth_type_for (vectype);
9763 else
9765 /* If we can widen the comparison to match vectype do so. */
9766 if (INTEGRAL_TYPE_P (scalar_type)
9767 && !slp_node
9768 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9769 TYPE_SIZE (TREE_TYPE (vectype))))
9770 scalar_type = build_nonstandard_integer_type
9771 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9772 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9773 slp_node);
9777 return true;
9780 /* vectorizable_condition.
9782 Check if STMT_INFO is conditional modify expression that can be vectorized.
9783 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9784 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9785 at GSI.
9787 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9789 Return true if STMT_INFO is vectorizable in this way. */
9791 static bool
9792 vectorizable_condition (vec_info *vinfo,
9793 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9794 gimple **vec_stmt,
9795 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9797 tree scalar_dest = NULL_TREE;
9798 tree vec_dest = NULL_TREE;
9799 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9800 tree then_clause, else_clause;
9801 tree comp_vectype = NULL_TREE;
9802 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9803 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9804 tree vec_compare;
9805 tree new_temp;
9806 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9807 enum vect_def_type dts[4]
9808 = {vect_unknown_def_type, vect_unknown_def_type,
9809 vect_unknown_def_type, vect_unknown_def_type};
9810 int ndts = 4;
9811 int ncopies;
9812 int vec_num;
9813 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9814 int i;
9815 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9816 vec<tree> vec_oprnds0 = vNULL;
9817 vec<tree> vec_oprnds1 = vNULL;
9818 vec<tree> vec_oprnds2 = vNULL;
9819 vec<tree> vec_oprnds3 = vNULL;
9820 tree vec_cmp_type;
9821 bool masked = false;
9823 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9824 return false;
9826 /* Is vectorizable conditional operation? */
9827 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9828 if (!stmt)
9829 return false;
9831 code = gimple_assign_rhs_code (stmt);
9832 if (code != COND_EXPR)
9833 return false;
9835 stmt_vec_info reduc_info = NULL;
9836 int reduc_index = -1;
9837 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9838 bool for_reduction
9839 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9840 if (for_reduction)
9842 if (STMT_SLP_TYPE (stmt_info))
9843 return false;
9844 reduc_info = info_for_reduction (vinfo, stmt_info);
9845 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9846 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9847 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9848 || reduc_index != -1);
9850 else
9852 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9853 return false;
9856 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9857 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9859 if (slp_node)
9861 ncopies = 1;
9862 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9864 else
9866 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9867 vec_num = 1;
9870 gcc_assert (ncopies >= 1);
9871 if (for_reduction && ncopies > 1)
9872 return false; /* FORNOW */
9874 cond_expr = gimple_assign_rhs1 (stmt);
9876 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9877 &comp_vectype, &dts[0], vectype)
9878 || !comp_vectype)
9879 return false;
9881 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9882 slp_tree then_slp_node, else_slp_node;
9883 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9884 &then_clause, &then_slp_node, &dts[2], &vectype1))
9885 return false;
9886 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9887 &else_clause, &else_slp_node, &dts[3], &vectype2))
9888 return false;
9890 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9891 return false;
9893 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9894 return false;
9896 masked = !COMPARISON_CLASS_P (cond_expr);
9897 vec_cmp_type = truth_type_for (comp_vectype);
9899 if (vec_cmp_type == NULL_TREE)
9900 return false;
9902 cond_code = TREE_CODE (cond_expr);
9903 if (!masked)
9905 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9906 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9909 /* For conditional reductions, the "then" value needs to be the candidate
9910 value calculated by this iteration while the "else" value needs to be
9911 the result carried over from previous iterations. If the COND_EXPR
9912 is the other way around, we need to swap it. */
9913 bool must_invert_cmp_result = false;
9914 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9916 if (masked)
9917 must_invert_cmp_result = true;
9918 else
9920 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9921 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9922 if (new_code == ERROR_MARK)
9923 must_invert_cmp_result = true;
9924 else
9926 cond_code = new_code;
9927 /* Make sure we don't accidentally use the old condition. */
9928 cond_expr = NULL_TREE;
9931 std::swap (then_clause, else_clause);
9934 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9936 /* Boolean values may have another representation in vectors
9937 and therefore we prefer bit operations over comparison for
9938 them (which also works for scalar masks). We store opcodes
9939 to use in bitop1 and bitop2. Statement is vectorized as
9940 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9941 depending on bitop1 and bitop2 arity. */
9942 switch (cond_code)
9944 case GT_EXPR:
9945 bitop1 = BIT_NOT_EXPR;
9946 bitop2 = BIT_AND_EXPR;
9947 break;
9948 case GE_EXPR:
9949 bitop1 = BIT_NOT_EXPR;
9950 bitop2 = BIT_IOR_EXPR;
9951 break;
9952 case LT_EXPR:
9953 bitop1 = BIT_NOT_EXPR;
9954 bitop2 = BIT_AND_EXPR;
9955 std::swap (cond_expr0, cond_expr1);
9956 break;
9957 case LE_EXPR:
9958 bitop1 = BIT_NOT_EXPR;
9959 bitop2 = BIT_IOR_EXPR;
9960 std::swap (cond_expr0, cond_expr1);
9961 break;
9962 case NE_EXPR:
9963 bitop1 = BIT_XOR_EXPR;
9964 break;
9965 case EQ_EXPR:
9966 bitop1 = BIT_XOR_EXPR;
9967 bitop2 = BIT_NOT_EXPR;
9968 break;
9969 default:
9970 return false;
9972 cond_code = SSA_NAME;
9975 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
9976 && reduction_type == EXTRACT_LAST_REDUCTION
9977 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
9979 if (dump_enabled_p ())
9980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9981 "reduction comparison operation not supported.\n");
9982 return false;
9985 if (!vec_stmt)
9987 if (bitop1 != NOP_EXPR)
9989 machine_mode mode = TYPE_MODE (comp_vectype);
9990 optab optab;
9992 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9993 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9994 return false;
9996 if (bitop2 != NOP_EXPR)
9998 optab = optab_for_tree_code (bitop2, comp_vectype,
9999 optab_default);
10000 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10001 return false;
10005 vect_cost_for_stmt kind = vector_stmt;
10006 if (reduction_type == EXTRACT_LAST_REDUCTION)
10007 /* Count one reduction-like operation per vector. */
10008 kind = vec_to_scalar;
10009 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10010 return false;
10012 if (slp_node
10013 && (!vect_maybe_update_slp_op_vectype
10014 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10015 || (op_adjust == 1
10016 && !vect_maybe_update_slp_op_vectype
10017 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10018 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10019 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10021 if (dump_enabled_p ())
10022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10023 "incompatible vector types for invariants\n");
10024 return false;
10027 if (loop_vinfo && for_reduction
10028 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10030 if (reduction_type == EXTRACT_LAST_REDUCTION)
10031 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10032 ncopies * vec_num, vectype, NULL);
10033 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10034 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10036 if (dump_enabled_p ())
10037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10038 "conditional reduction prevents the use"
10039 " of partial vectors.\n");
10040 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10044 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10045 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10046 cost_vec, kind);
10047 return true;
10050 /* Transform. */
10052 if (!slp_node)
10054 vec_oprnds0.create (1);
10055 vec_oprnds1.create (1);
10056 vec_oprnds2.create (1);
10057 vec_oprnds3.create (1);
10060 /* Handle def. */
10061 scalar_dest = gimple_assign_lhs (stmt);
10062 if (reduction_type != EXTRACT_LAST_REDUCTION)
10063 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10065 bool swap_cond_operands = false;
10067 /* See whether another part of the vectorized code applies a loop
10068 mask to the condition, or to its inverse. */
10070 vec_loop_masks *masks = NULL;
10071 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10073 if (reduction_type == EXTRACT_LAST_REDUCTION)
10074 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10075 else
10077 scalar_cond_masked_key cond (cond_expr, ncopies);
10078 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10079 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10080 else
10082 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10083 cond.code = invert_tree_comparison (cond.code, honor_nans);
10084 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10086 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10087 cond_code = cond.code;
10088 swap_cond_operands = true;
10094 /* Handle cond expr. */
10095 if (masked)
10096 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10097 cond_expr, &vec_oprnds0, comp_vectype,
10098 then_clause, &vec_oprnds2, vectype,
10099 reduction_type != EXTRACT_LAST_REDUCTION
10100 ? else_clause : NULL, &vec_oprnds3, vectype);
10101 else
10102 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10103 cond_expr0, &vec_oprnds0, comp_vectype,
10104 cond_expr1, &vec_oprnds1, comp_vectype,
10105 then_clause, &vec_oprnds2, vectype,
10106 reduction_type != EXTRACT_LAST_REDUCTION
10107 ? else_clause : NULL, &vec_oprnds3, vectype);
10109 /* Arguments are ready. Create the new vector stmt. */
10110 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10112 vec_then_clause = vec_oprnds2[i];
10113 if (reduction_type != EXTRACT_LAST_REDUCTION)
10114 vec_else_clause = vec_oprnds3[i];
10116 if (swap_cond_operands)
10117 std::swap (vec_then_clause, vec_else_clause);
10119 if (masked)
10120 vec_compare = vec_cond_lhs;
10121 else
10123 vec_cond_rhs = vec_oprnds1[i];
10124 if (bitop1 == NOP_EXPR)
10126 gimple_seq stmts = NULL;
10127 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10128 vec_cond_lhs, vec_cond_rhs);
10129 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10131 else
10133 new_temp = make_ssa_name (vec_cmp_type);
10134 gassign *new_stmt;
10135 if (bitop1 == BIT_NOT_EXPR)
10136 new_stmt = gimple_build_assign (new_temp, bitop1,
10137 vec_cond_rhs);
10138 else
10139 new_stmt
10140 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10141 vec_cond_rhs);
10142 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10143 if (bitop2 == NOP_EXPR)
10144 vec_compare = new_temp;
10145 else if (bitop2 == BIT_NOT_EXPR)
10147 /* Instead of doing ~x ? y : z do x ? z : y. */
10148 vec_compare = new_temp;
10149 std::swap (vec_then_clause, vec_else_clause);
10151 else
10153 vec_compare = make_ssa_name (vec_cmp_type);
10154 new_stmt
10155 = gimple_build_assign (vec_compare, bitop2,
10156 vec_cond_lhs, new_temp);
10157 vect_finish_stmt_generation (vinfo, stmt_info,
10158 new_stmt, gsi);
10163 /* If we decided to apply a loop mask to the result of the vector
10164 comparison, AND the comparison with the mask now. Later passes
10165 should then be able to reuse the AND results between mulitple
10166 vector statements.
10168 For example:
10169 for (int i = 0; i < 100; ++i)
10170 x[i] = y[i] ? z[i] : 10;
10172 results in following optimized GIMPLE:
10174 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10175 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10176 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10177 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10178 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10179 vect_iftmp.11_47, { 10, ... }>;
10181 instead of using a masked and unmasked forms of
10182 vec != { 0, ... } (masked in the MASK_LOAD,
10183 unmasked in the VEC_COND_EXPR). */
10185 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10186 in cases where that's necessary. */
10188 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10190 if (!is_gimple_val (vec_compare))
10192 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10193 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10194 vec_compare);
10195 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10196 vec_compare = vec_compare_name;
10199 if (must_invert_cmp_result)
10201 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10202 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10203 BIT_NOT_EXPR,
10204 vec_compare);
10205 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10206 vec_compare = vec_compare_name;
10209 if (masks)
10211 unsigned vec_num = vec_oprnds0.length ();
10212 tree loop_mask
10213 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10214 vectype, i);
10215 tree tmp2 = make_ssa_name (vec_cmp_type);
10216 gassign *g
10217 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10218 loop_mask);
10219 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10220 vec_compare = tmp2;
10224 gimple *new_stmt;
10225 if (reduction_type == EXTRACT_LAST_REDUCTION)
10227 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10228 tree lhs = gimple_get_lhs (old_stmt);
10229 new_stmt = gimple_build_call_internal
10230 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10231 vec_then_clause);
10232 gimple_call_set_lhs (new_stmt, lhs);
10233 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10234 if (old_stmt == gsi_stmt (*gsi))
10235 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10236 else
10238 /* In this case we're moving the definition to later in the
10239 block. That doesn't matter because the only uses of the
10240 lhs are in phi statements. */
10241 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10242 gsi_remove (&old_gsi, true);
10243 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10246 else
10248 new_temp = make_ssa_name (vec_dest);
10249 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10250 vec_then_clause, vec_else_clause);
10251 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10253 if (slp_node)
10254 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10255 else
10256 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10259 if (!slp_node)
10260 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10262 vec_oprnds0.release ();
10263 vec_oprnds1.release ();
10264 vec_oprnds2.release ();
10265 vec_oprnds3.release ();
10267 return true;
10270 /* vectorizable_comparison.
10272 Check if STMT_INFO is comparison expression that can be vectorized.
10273 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10274 comparison, put it in VEC_STMT, and insert it at GSI.
10276 Return true if STMT_INFO is vectorizable in this way. */
10278 static bool
10279 vectorizable_comparison (vec_info *vinfo,
10280 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10281 gimple **vec_stmt,
10282 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10284 tree lhs, rhs1, rhs2;
10285 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10286 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10287 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10288 tree new_temp;
10289 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10290 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10291 int ndts = 2;
10292 poly_uint64 nunits;
10293 int ncopies;
10294 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10295 int i;
10296 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10297 vec<tree> vec_oprnds0 = vNULL;
10298 vec<tree> vec_oprnds1 = vNULL;
10299 tree mask_type;
10300 tree mask;
10302 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10303 return false;
10305 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10306 return false;
10308 mask_type = vectype;
10309 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10311 if (slp_node)
10312 ncopies = 1;
10313 else
10314 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10316 gcc_assert (ncopies >= 1);
10317 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10318 return false;
10320 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10321 if (!stmt)
10322 return false;
10324 code = gimple_assign_rhs_code (stmt);
10326 if (TREE_CODE_CLASS (code) != tcc_comparison)
10327 return false;
10329 slp_tree slp_rhs1, slp_rhs2;
10330 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10331 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10332 return false;
10334 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10335 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10336 return false;
10338 if (vectype1 && vectype2
10339 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10340 TYPE_VECTOR_SUBPARTS (vectype2)))
10341 return false;
10343 vectype = vectype1 ? vectype1 : vectype2;
10345 /* Invariant comparison. */
10346 if (!vectype)
10348 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10349 vectype = mask_type;
10350 else
10351 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10352 slp_node);
10353 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10354 return false;
10356 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10357 return false;
10359 /* Can't compare mask and non-mask types. */
10360 if (vectype1 && vectype2
10361 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10362 return false;
10364 /* Boolean values may have another representation in vectors
10365 and therefore we prefer bit operations over comparison for
10366 them (which also works for scalar masks). We store opcodes
10367 to use in bitop1 and bitop2. Statement is vectorized as
10368 BITOP2 (rhs1 BITOP1 rhs2) or
10369 rhs1 BITOP2 (BITOP1 rhs2)
10370 depending on bitop1 and bitop2 arity. */
10371 bool swap_p = false;
10372 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10374 if (code == GT_EXPR)
10376 bitop1 = BIT_NOT_EXPR;
10377 bitop2 = BIT_AND_EXPR;
10379 else if (code == GE_EXPR)
10381 bitop1 = BIT_NOT_EXPR;
10382 bitop2 = BIT_IOR_EXPR;
10384 else if (code == LT_EXPR)
10386 bitop1 = BIT_NOT_EXPR;
10387 bitop2 = BIT_AND_EXPR;
10388 swap_p = true;
10390 else if (code == LE_EXPR)
10392 bitop1 = BIT_NOT_EXPR;
10393 bitop2 = BIT_IOR_EXPR;
10394 swap_p = true;
10396 else
10398 bitop1 = BIT_XOR_EXPR;
10399 if (code == EQ_EXPR)
10400 bitop2 = BIT_NOT_EXPR;
10404 if (!vec_stmt)
10406 if (bitop1 == NOP_EXPR)
10408 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10409 return false;
10411 else
10413 machine_mode mode = TYPE_MODE (vectype);
10414 optab optab;
10416 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10417 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10418 return false;
10420 if (bitop2 != NOP_EXPR)
10422 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10423 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10424 return false;
10428 /* Put types on constant and invariant SLP children. */
10429 if (slp_node
10430 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10431 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10433 if (dump_enabled_p ())
10434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10435 "incompatible vector types for invariants\n");
10436 return false;
10439 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10440 vect_model_simple_cost (vinfo, stmt_info,
10441 ncopies * (1 + (bitop2 != NOP_EXPR)),
10442 dts, ndts, slp_node, cost_vec);
10443 return true;
10446 /* Transform. */
10447 if (!slp_node)
10449 vec_oprnds0.create (1);
10450 vec_oprnds1.create (1);
10453 /* Handle def. */
10454 lhs = gimple_assign_lhs (stmt);
10455 mask = vect_create_destination_var (lhs, mask_type);
10457 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10458 rhs1, &vec_oprnds0, vectype,
10459 rhs2, &vec_oprnds1, vectype);
10460 if (swap_p)
10461 std::swap (vec_oprnds0, vec_oprnds1);
10463 /* Arguments are ready. Create the new vector stmt. */
10464 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10466 gimple *new_stmt;
10467 vec_rhs2 = vec_oprnds1[i];
10469 new_temp = make_ssa_name (mask);
10470 if (bitop1 == NOP_EXPR)
10472 new_stmt = gimple_build_assign (new_temp, code,
10473 vec_rhs1, vec_rhs2);
10474 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10476 else
10478 if (bitop1 == BIT_NOT_EXPR)
10479 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10480 else
10481 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10482 vec_rhs2);
10483 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10484 if (bitop2 != NOP_EXPR)
10486 tree res = make_ssa_name (mask);
10487 if (bitop2 == BIT_NOT_EXPR)
10488 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10489 else
10490 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10491 new_temp);
10492 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10495 if (slp_node)
10496 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10497 else
10498 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10501 if (!slp_node)
10502 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10504 vec_oprnds0.release ();
10505 vec_oprnds1.release ();
10507 return true;
10510 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10511 can handle all live statements in the node. Otherwise return true
10512 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10513 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10515 static bool
10516 can_vectorize_live_stmts (vec_info *vinfo,
10517 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10518 slp_tree slp_node, slp_instance slp_node_instance,
10519 bool vec_stmt_p,
10520 stmt_vector_for_cost *cost_vec)
10522 if (slp_node)
10524 stmt_vec_info slp_stmt_info;
10525 unsigned int i;
10526 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10528 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10529 && !vectorizable_live_operation (vinfo,
10530 slp_stmt_info, gsi, slp_node,
10531 slp_node_instance, i,
10532 vec_stmt_p, cost_vec))
10533 return false;
10536 else if (STMT_VINFO_LIVE_P (stmt_info)
10537 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10538 slp_node, slp_node_instance, -1,
10539 vec_stmt_p, cost_vec))
10540 return false;
10542 return true;
10545 /* Make sure the statement is vectorizable. */
10547 opt_result
10548 vect_analyze_stmt (vec_info *vinfo,
10549 stmt_vec_info stmt_info, bool *need_to_vectorize,
10550 slp_tree node, slp_instance node_instance,
10551 stmt_vector_for_cost *cost_vec)
10553 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10554 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10555 bool ok;
10556 gimple_seq pattern_def_seq;
10558 if (dump_enabled_p ())
10559 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10560 stmt_info->stmt);
10562 if (gimple_has_volatile_ops (stmt_info->stmt))
10563 return opt_result::failure_at (stmt_info->stmt,
10564 "not vectorized:"
10565 " stmt has volatile operands: %G\n",
10566 stmt_info->stmt);
10568 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10569 && node == NULL
10570 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10572 gimple_stmt_iterator si;
10574 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10576 stmt_vec_info pattern_def_stmt_info
10577 = vinfo->lookup_stmt (gsi_stmt (si));
10578 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10579 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10581 /* Analyze def stmt of STMT if it's a pattern stmt. */
10582 if (dump_enabled_p ())
10583 dump_printf_loc (MSG_NOTE, vect_location,
10584 "==> examining pattern def statement: %G",
10585 pattern_def_stmt_info->stmt);
10587 opt_result res
10588 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10589 need_to_vectorize, node, node_instance,
10590 cost_vec);
10591 if (!res)
10592 return res;
10597 /* Skip stmts that do not need to be vectorized. In loops this is expected
10598 to include:
10599 - the COND_EXPR which is the loop exit condition
10600 - any LABEL_EXPRs in the loop
10601 - computations that are used only for array indexing or loop control.
10602 In basic blocks we only analyze statements that are a part of some SLP
10603 instance, therefore, all the statements are relevant.
10605 Pattern statement needs to be analyzed instead of the original statement
10606 if the original statement is not relevant. Otherwise, we analyze both
10607 statements. In basic blocks we are called from some SLP instance
10608 traversal, don't analyze pattern stmts instead, the pattern stmts
10609 already will be part of SLP instance. */
10611 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10612 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10613 && !STMT_VINFO_LIVE_P (stmt_info))
10615 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10616 && pattern_stmt_info
10617 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10618 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10620 /* Analyze PATTERN_STMT instead of the original stmt. */
10621 stmt_info = pattern_stmt_info;
10622 if (dump_enabled_p ())
10623 dump_printf_loc (MSG_NOTE, vect_location,
10624 "==> examining pattern statement: %G",
10625 stmt_info->stmt);
10627 else
10629 if (dump_enabled_p ())
10630 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10632 return opt_result::success ();
10635 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10636 && node == NULL
10637 && pattern_stmt_info
10638 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10639 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10641 /* Analyze PATTERN_STMT too. */
10642 if (dump_enabled_p ())
10643 dump_printf_loc (MSG_NOTE, vect_location,
10644 "==> examining pattern statement: %G",
10645 pattern_stmt_info->stmt);
10647 opt_result res
10648 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10649 node_instance, cost_vec);
10650 if (!res)
10651 return res;
10654 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10656 case vect_internal_def:
10657 break;
10659 case vect_reduction_def:
10660 case vect_nested_cycle:
10661 gcc_assert (!bb_vinfo
10662 && (relevance == vect_used_in_outer
10663 || relevance == vect_used_in_outer_by_reduction
10664 || relevance == vect_used_by_reduction
10665 || relevance == vect_unused_in_scope
10666 || relevance == vect_used_only_live));
10667 break;
10669 case vect_induction_def:
10670 gcc_assert (!bb_vinfo);
10671 break;
10673 case vect_constant_def:
10674 case vect_external_def:
10675 case vect_unknown_def_type:
10676 default:
10677 gcc_unreachable ();
10680 if (STMT_VINFO_RELEVANT_P (stmt_info))
10682 tree type = gimple_expr_type (stmt_info->stmt);
10683 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10684 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10685 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10686 || (call && gimple_call_lhs (call) == NULL_TREE));
10687 *need_to_vectorize = true;
10690 if (PURE_SLP_STMT (stmt_info) && !node)
10692 if (dump_enabled_p ())
10693 dump_printf_loc (MSG_NOTE, vect_location,
10694 "handled only by SLP analysis\n");
10695 return opt_result::success ();
10698 ok = true;
10699 if (!bb_vinfo
10700 && (STMT_VINFO_RELEVANT_P (stmt_info)
10701 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10702 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10703 -mveclibabi= takes preference over library functions with
10704 the simd attribute. */
10705 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10706 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10707 cost_vec)
10708 || vectorizable_conversion (vinfo, stmt_info,
10709 NULL, NULL, node, cost_vec)
10710 || vectorizable_operation (vinfo, stmt_info,
10711 NULL, NULL, node, cost_vec)
10712 || vectorizable_assignment (vinfo, stmt_info,
10713 NULL, NULL, node, cost_vec)
10714 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10715 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10716 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10717 node, node_instance, cost_vec)
10718 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10719 NULL, node, cost_vec)
10720 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10721 || vectorizable_condition (vinfo, stmt_info,
10722 NULL, NULL, node, cost_vec)
10723 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10724 cost_vec)
10725 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10726 stmt_info, NULL, node));
10727 else
10729 if (bb_vinfo)
10730 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10731 || vectorizable_simd_clone_call (vinfo, stmt_info,
10732 NULL, NULL, node, cost_vec)
10733 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10734 cost_vec)
10735 || vectorizable_shift (vinfo, stmt_info,
10736 NULL, NULL, node, cost_vec)
10737 || vectorizable_operation (vinfo, stmt_info,
10738 NULL, NULL, node, cost_vec)
10739 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10740 cost_vec)
10741 || vectorizable_load (vinfo, stmt_info,
10742 NULL, NULL, node, cost_vec)
10743 || vectorizable_store (vinfo, stmt_info,
10744 NULL, NULL, node, cost_vec)
10745 || vectorizable_condition (vinfo, stmt_info,
10746 NULL, NULL, node, cost_vec)
10747 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10748 cost_vec));
10751 if (!ok)
10752 return opt_result::failure_at (stmt_info->stmt,
10753 "not vectorized:"
10754 " relevant stmt not supported: %G",
10755 stmt_info->stmt);
10757 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10758 need extra handling, except for vectorizable reductions. */
10759 if (!bb_vinfo
10760 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10761 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10762 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10763 stmt_info, NULL, node, node_instance,
10764 false, cost_vec))
10765 return opt_result::failure_at (stmt_info->stmt,
10766 "not vectorized:"
10767 " live stmt not supported: %G",
10768 stmt_info->stmt);
10770 return opt_result::success ();
10774 /* Function vect_transform_stmt.
10776 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10778 bool
10779 vect_transform_stmt (vec_info *vinfo,
10780 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10781 slp_tree slp_node, slp_instance slp_node_instance)
10783 bool is_store = false;
10784 gimple *vec_stmt = NULL;
10785 bool done;
10787 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10789 switch (STMT_VINFO_TYPE (stmt_info))
10791 case type_demotion_vec_info_type:
10792 case type_promotion_vec_info_type:
10793 case type_conversion_vec_info_type:
10794 done = vectorizable_conversion (vinfo, stmt_info,
10795 gsi, &vec_stmt, slp_node, NULL);
10796 gcc_assert (done);
10797 break;
10799 case induc_vec_info_type:
10800 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10801 stmt_info, &vec_stmt, slp_node,
10802 NULL);
10803 gcc_assert (done);
10804 break;
10806 case shift_vec_info_type:
10807 done = vectorizable_shift (vinfo, stmt_info,
10808 gsi, &vec_stmt, slp_node, NULL);
10809 gcc_assert (done);
10810 break;
10812 case op_vec_info_type:
10813 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10814 NULL);
10815 gcc_assert (done);
10816 break;
10818 case assignment_vec_info_type:
10819 done = vectorizable_assignment (vinfo, stmt_info,
10820 gsi, &vec_stmt, slp_node, NULL);
10821 gcc_assert (done);
10822 break;
10824 case load_vec_info_type:
10825 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10826 NULL);
10827 gcc_assert (done);
10828 break;
10830 case store_vec_info_type:
10831 done = vectorizable_store (vinfo, stmt_info,
10832 gsi, &vec_stmt, slp_node, NULL);
10833 gcc_assert (done);
10834 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10836 /* In case of interleaving, the whole chain is vectorized when the
10837 last store in the chain is reached. Store stmts before the last
10838 one are skipped, and there vec_stmt_info shouldn't be freed
10839 meanwhile. */
10840 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10841 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10842 is_store = true;
10844 else
10845 is_store = true;
10846 break;
10848 case condition_vec_info_type:
10849 done = vectorizable_condition (vinfo, stmt_info,
10850 gsi, &vec_stmt, slp_node, NULL);
10851 gcc_assert (done);
10852 break;
10854 case comparison_vec_info_type:
10855 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10856 slp_node, NULL);
10857 gcc_assert (done);
10858 break;
10860 case call_vec_info_type:
10861 done = vectorizable_call (vinfo, stmt_info,
10862 gsi, &vec_stmt, slp_node, NULL);
10863 break;
10865 case call_simd_clone_vec_info_type:
10866 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10867 slp_node, NULL);
10868 break;
10870 case reduc_vec_info_type:
10871 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10872 gsi, &vec_stmt, slp_node);
10873 gcc_assert (done);
10874 break;
10876 case cycle_phi_info_type:
10877 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10878 &vec_stmt, slp_node, slp_node_instance);
10879 gcc_assert (done);
10880 break;
10882 case lc_phi_info_type:
10883 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10884 stmt_info, &vec_stmt, slp_node);
10885 gcc_assert (done);
10886 break;
10888 default:
10889 if (!STMT_VINFO_LIVE_P (stmt_info))
10891 if (dump_enabled_p ())
10892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10893 "stmt not supported.\n");
10894 gcc_unreachable ();
10896 done = true;
10899 if (!slp_node && vec_stmt)
10900 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
10902 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
10903 return is_store;
10905 /* Handle stmts whose DEF is used outside the loop-nest that is
10906 being vectorized. */
10907 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
10908 slp_node_instance, true, NULL);
10909 gcc_assert (done);
10911 return false;
10915 /* Remove a group of stores (for SLP or interleaving), free their
10916 stmt_vec_info. */
10918 void
10919 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
10921 stmt_vec_info next_stmt_info = first_stmt_info;
10923 while (next_stmt_info)
10925 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10926 next_stmt_info = vect_orig_stmt (next_stmt_info);
10927 /* Free the attached stmt_vec_info and remove the stmt. */
10928 vinfo->remove_stmt (next_stmt_info);
10929 next_stmt_info = tmp;
10933 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10934 elements of type SCALAR_TYPE, or null if the target doesn't support
10935 such a type.
10937 If NUNITS is zero, return a vector type that contains elements of
10938 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10940 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10941 for this vectorization region and want to "autodetect" the best choice.
10942 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10943 and we want the new type to be interoperable with it. PREVAILING_MODE
10944 in this case can be a scalar integer mode or a vector mode; when it
10945 is a vector mode, the function acts like a tree-level version of
10946 related_vector_mode. */
10948 tree
10949 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
10950 tree scalar_type, poly_uint64 nunits)
10952 tree orig_scalar_type = scalar_type;
10953 scalar_mode inner_mode;
10954 machine_mode simd_mode;
10955 tree vectype;
10957 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
10958 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
10959 return NULL_TREE;
10961 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
10963 /* For vector types of elements whose mode precision doesn't
10964 match their types precision we use a element type of mode
10965 precision. The vectorization routines will have to make sure
10966 they support the proper result truncation/extension.
10967 We also make sure to build vector types with INTEGER_TYPE
10968 component type only. */
10969 if (INTEGRAL_TYPE_P (scalar_type)
10970 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
10971 || TREE_CODE (scalar_type) != INTEGER_TYPE))
10972 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10973 TYPE_UNSIGNED (scalar_type));
10975 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10976 When the component mode passes the above test simply use a type
10977 corresponding to that mode. The theory is that any use that
10978 would cause problems with this will disable vectorization anyway. */
10979 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10980 && !INTEGRAL_TYPE_P (scalar_type))
10981 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10983 /* We can't build a vector type of elements with alignment bigger than
10984 their size. */
10985 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10986 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10987 TYPE_UNSIGNED (scalar_type));
10989 /* If we felt back to using the mode fail if there was
10990 no scalar type for it. */
10991 if (scalar_type == NULL_TREE)
10992 return NULL_TREE;
10994 /* If no prevailing mode was supplied, use the mode the target prefers.
10995 Otherwise lookup a vector mode based on the prevailing mode. */
10996 if (prevailing_mode == VOIDmode)
10998 gcc_assert (known_eq (nunits, 0U));
10999 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11000 if (SCALAR_INT_MODE_P (simd_mode))
11002 /* Traditional behavior is not to take the integer mode
11003 literally, but simply to use it as a way of determining
11004 the vector size. It is up to mode_for_vector to decide
11005 what the TYPE_MODE should be.
11007 Note that nunits == 1 is allowed in order to support single
11008 element vector types. */
11009 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11010 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11011 return NULL_TREE;
11014 else if (SCALAR_INT_MODE_P (prevailing_mode)
11015 || !related_vector_mode (prevailing_mode,
11016 inner_mode, nunits).exists (&simd_mode))
11018 /* Fall back to using mode_for_vector, mostly in the hope of being
11019 able to use an integer mode. */
11020 if (known_eq (nunits, 0U)
11021 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11022 return NULL_TREE;
11024 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11025 return NULL_TREE;
11028 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11030 /* In cases where the mode was chosen by mode_for_vector, check that
11031 the target actually supports the chosen mode, or that it at least
11032 allows the vector mode to be replaced by a like-sized integer. */
11033 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11034 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11035 return NULL_TREE;
11037 /* Re-attach the address-space qualifier if we canonicalized the scalar
11038 type. */
11039 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11040 return build_qualified_type
11041 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11043 return vectype;
11046 /* Function get_vectype_for_scalar_type.
11048 Returns the vector type corresponding to SCALAR_TYPE as supported
11049 by the target. If GROUP_SIZE is nonzero and we're performing BB
11050 vectorization, make sure that the number of elements in the vector
11051 is no bigger than GROUP_SIZE. */
11053 tree
11054 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11055 unsigned int group_size)
11057 /* For BB vectorization, we should always have a group size once we've
11058 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11059 are tentative requests during things like early data reference
11060 analysis and pattern recognition. */
11061 if (is_a <bb_vec_info> (vinfo))
11062 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11063 else
11064 group_size = 0;
11066 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11067 scalar_type);
11068 if (vectype && vinfo->vector_mode == VOIDmode)
11069 vinfo->vector_mode = TYPE_MODE (vectype);
11071 /* Register the natural choice of vector type, before the group size
11072 has been applied. */
11073 if (vectype)
11074 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11076 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11077 try again with an explicit number of elements. */
11078 if (vectype
11079 && group_size
11080 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11082 /* Start with the biggest number of units that fits within
11083 GROUP_SIZE and halve it until we find a valid vector type.
11084 Usually either the first attempt will succeed or all will
11085 fail (in the latter case because GROUP_SIZE is too small
11086 for the target), but it's possible that a target could have
11087 a hole between supported vector types.
11089 If GROUP_SIZE is not a power of 2, this has the effect of
11090 trying the largest power of 2 that fits within the group,
11091 even though the group is not a multiple of that vector size.
11092 The BB vectorizer will then try to carve up the group into
11093 smaller pieces. */
11094 unsigned int nunits = 1 << floor_log2 (group_size);
11097 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11098 scalar_type, nunits);
11099 nunits /= 2;
11101 while (nunits > 1 && !vectype);
11104 return vectype;
11107 /* Return the vector type corresponding to SCALAR_TYPE as supported
11108 by the target. NODE, if nonnull, is the SLP tree node that will
11109 use the returned vector type. */
11111 tree
11112 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11114 unsigned int group_size = 0;
11115 if (node)
11116 group_size = SLP_TREE_LANES (node);
11117 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11120 /* Function get_mask_type_for_scalar_type.
11122 Returns the mask type corresponding to a result of comparison
11123 of vectors of specified SCALAR_TYPE as supported by target.
11124 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11125 make sure that the number of elements in the vector is no bigger
11126 than GROUP_SIZE. */
11128 tree
11129 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11130 unsigned int group_size)
11132 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11134 if (!vectype)
11135 return NULL;
11137 return truth_type_for (vectype);
11140 /* Function get_same_sized_vectype
11142 Returns a vector type corresponding to SCALAR_TYPE of size
11143 VECTOR_TYPE if supported by the target. */
11145 tree
11146 get_same_sized_vectype (tree scalar_type, tree vector_type)
11148 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11149 return truth_type_for (vector_type);
11151 poly_uint64 nunits;
11152 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11153 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11154 return NULL_TREE;
11156 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11157 scalar_type, nunits);
11160 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11161 would not change the chosen vector modes. */
11163 bool
11164 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11166 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11167 i != vinfo->used_vector_modes.end (); ++i)
11168 if (!VECTOR_MODE_P (*i)
11169 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11170 return false;
11171 return true;
11174 /* Function vect_is_simple_use.
11176 Input:
11177 VINFO - the vect info of the loop or basic block that is being vectorized.
11178 OPERAND - operand in the loop or bb.
11179 Output:
11180 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11181 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11182 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11183 the definition could be anywhere in the function
11184 DT - the type of definition
11186 Returns whether a stmt with OPERAND can be vectorized.
11187 For loops, supportable operands are constants, loop invariants, and operands
11188 that are defined by the current iteration of the loop. Unsupportable
11189 operands are those that are defined by a previous iteration of the loop (as
11190 is the case in reduction/induction computations).
11191 For basic blocks, supportable operands are constants and bb invariants.
11192 For now, operands defined outside the basic block are not supported. */
11194 bool
11195 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11196 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11198 if (def_stmt_info_out)
11199 *def_stmt_info_out = NULL;
11200 if (def_stmt_out)
11201 *def_stmt_out = NULL;
11202 *dt = vect_unknown_def_type;
11204 if (dump_enabled_p ())
11206 dump_printf_loc (MSG_NOTE, vect_location,
11207 "vect_is_simple_use: operand ");
11208 if (TREE_CODE (operand) == SSA_NAME
11209 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11210 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11211 else
11212 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11215 if (CONSTANT_CLASS_P (operand))
11216 *dt = vect_constant_def;
11217 else if (is_gimple_min_invariant (operand))
11218 *dt = vect_external_def;
11219 else if (TREE_CODE (operand) != SSA_NAME)
11220 *dt = vect_unknown_def_type;
11221 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11222 *dt = vect_external_def;
11223 else
11225 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11226 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11227 if (!stmt_vinfo)
11228 *dt = vect_external_def;
11229 else
11231 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11232 def_stmt = stmt_vinfo->stmt;
11233 switch (gimple_code (def_stmt))
11235 case GIMPLE_PHI:
11236 case GIMPLE_ASSIGN:
11237 case GIMPLE_CALL:
11238 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11239 break;
11240 default:
11241 *dt = vect_unknown_def_type;
11242 break;
11244 if (def_stmt_info_out)
11245 *def_stmt_info_out = stmt_vinfo;
11247 if (def_stmt_out)
11248 *def_stmt_out = def_stmt;
11251 if (dump_enabled_p ())
11253 dump_printf (MSG_NOTE, ", type of def: ");
11254 switch (*dt)
11256 case vect_uninitialized_def:
11257 dump_printf (MSG_NOTE, "uninitialized\n");
11258 break;
11259 case vect_constant_def:
11260 dump_printf (MSG_NOTE, "constant\n");
11261 break;
11262 case vect_external_def:
11263 dump_printf (MSG_NOTE, "external\n");
11264 break;
11265 case vect_internal_def:
11266 dump_printf (MSG_NOTE, "internal\n");
11267 break;
11268 case vect_induction_def:
11269 dump_printf (MSG_NOTE, "induction\n");
11270 break;
11271 case vect_reduction_def:
11272 dump_printf (MSG_NOTE, "reduction\n");
11273 break;
11274 case vect_double_reduction_def:
11275 dump_printf (MSG_NOTE, "double reduction\n");
11276 break;
11277 case vect_nested_cycle:
11278 dump_printf (MSG_NOTE, "nested cycle\n");
11279 break;
11280 case vect_unknown_def_type:
11281 dump_printf (MSG_NOTE, "unknown\n");
11282 break;
11286 if (*dt == vect_unknown_def_type)
11288 if (dump_enabled_p ())
11289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11290 "Unsupported pattern.\n");
11291 return false;
11294 return true;
11297 /* Function vect_is_simple_use.
11299 Same as vect_is_simple_use but also determines the vector operand
11300 type of OPERAND and stores it to *VECTYPE. If the definition of
11301 OPERAND is vect_uninitialized_def, vect_constant_def or
11302 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11303 is responsible to compute the best suited vector type for the
11304 scalar operand. */
11306 bool
11307 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11308 tree *vectype, stmt_vec_info *def_stmt_info_out,
11309 gimple **def_stmt_out)
11311 stmt_vec_info def_stmt_info;
11312 gimple *def_stmt;
11313 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11314 return false;
11316 if (def_stmt_out)
11317 *def_stmt_out = def_stmt;
11318 if (def_stmt_info_out)
11319 *def_stmt_info_out = def_stmt_info;
11321 /* Now get a vector type if the def is internal, otherwise supply
11322 NULL_TREE and leave it up to the caller to figure out a proper
11323 type for the use stmt. */
11324 if (*dt == vect_internal_def
11325 || *dt == vect_induction_def
11326 || *dt == vect_reduction_def
11327 || *dt == vect_double_reduction_def
11328 || *dt == vect_nested_cycle)
11330 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11331 gcc_assert (*vectype != NULL_TREE);
11332 if (dump_enabled_p ())
11333 dump_printf_loc (MSG_NOTE, vect_location,
11334 "vect_is_simple_use: vectype %T\n", *vectype);
11336 else if (*dt == vect_uninitialized_def
11337 || *dt == vect_constant_def
11338 || *dt == vect_external_def)
11339 *vectype = NULL_TREE;
11340 else
11341 gcc_unreachable ();
11343 return true;
11346 /* Function vect_is_simple_use.
11348 Same as vect_is_simple_use but determines the operand by operand
11349 position OPERAND from either STMT or SLP_NODE, filling in *OP
11350 and *SLP_DEF (when SLP_NODE is not NULL). */
11352 bool
11353 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11354 unsigned operand, tree *op, slp_tree *slp_def,
11355 enum vect_def_type *dt,
11356 tree *vectype, stmt_vec_info *def_stmt_info_out)
11358 if (slp_node)
11360 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11361 *slp_def = child;
11362 *vectype = SLP_TREE_VECTYPE (child);
11363 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11365 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11366 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11368 else
11370 if (def_stmt_info_out)
11371 *def_stmt_info_out = NULL;
11372 *op = SLP_TREE_SCALAR_OPS (child)[0];
11373 *dt = SLP_TREE_DEF_TYPE (child);
11374 return true;
11377 else
11379 *slp_def = NULL;
11380 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11382 if (gimple_assign_rhs_code (ass) == COND_EXPR
11383 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11385 if (operand < 2)
11386 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11387 else
11388 *op = gimple_op (ass, operand);
11390 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11391 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11392 else
11393 *op = gimple_op (ass, operand + 1);
11395 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11397 if (gimple_call_internal_p (call)
11398 && internal_store_fn_p (gimple_call_internal_fn (call)))
11399 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11400 (call));
11401 *op = gimple_call_arg (call, operand);
11403 else
11404 gcc_unreachable ();
11405 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11409 /* If OP is not NULL and is external or constant update its vector
11410 type with VECTYPE. Returns true if successful or false if not,
11411 for example when conflicting vector types are present. */
11413 bool
11414 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11416 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11417 return true;
11418 if (SLP_TREE_VECTYPE (op))
11419 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11420 SLP_TREE_VECTYPE (op) = vectype;
11421 return true;
11424 /* Function supportable_widening_operation
11426 Check whether an operation represented by the code CODE is a
11427 widening operation that is supported by the target platform in
11428 vector form (i.e., when operating on arguments of type VECTYPE_IN
11429 producing a result of type VECTYPE_OUT).
11431 Widening operations we currently support are NOP (CONVERT), FLOAT,
11432 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11433 are supported by the target platform either directly (via vector
11434 tree-codes), or via target builtins.
11436 Output:
11437 - CODE1 and CODE2 are codes of vector operations to be used when
11438 vectorizing the operation, if available.
11439 - MULTI_STEP_CVT determines the number of required intermediate steps in
11440 case of multi-step conversion (like char->short->int - in that case
11441 MULTI_STEP_CVT will be 1).
11442 - INTERM_TYPES contains the intermediate type required to perform the
11443 widening operation (short in the above example). */
11445 bool
11446 supportable_widening_operation (vec_info *vinfo,
11447 enum tree_code code, stmt_vec_info stmt_info,
11448 tree vectype_out, tree vectype_in,
11449 enum tree_code *code1, enum tree_code *code2,
11450 int *multi_step_cvt,
11451 vec<tree> *interm_types)
11453 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11454 class loop *vect_loop = NULL;
11455 machine_mode vec_mode;
11456 enum insn_code icode1, icode2;
11457 optab optab1, optab2;
11458 tree vectype = vectype_in;
11459 tree wide_vectype = vectype_out;
11460 enum tree_code c1, c2;
11461 int i;
11462 tree prev_type, intermediate_type;
11463 machine_mode intermediate_mode, prev_mode;
11464 optab optab3, optab4;
11466 *multi_step_cvt = 0;
11467 if (loop_info)
11468 vect_loop = LOOP_VINFO_LOOP (loop_info);
11470 switch (code)
11472 case WIDEN_MULT_EXPR:
11473 /* The result of a vectorized widening operation usually requires
11474 two vectors (because the widened results do not fit into one vector).
11475 The generated vector results would normally be expected to be
11476 generated in the same order as in the original scalar computation,
11477 i.e. if 8 results are generated in each vector iteration, they are
11478 to be organized as follows:
11479 vect1: [res1,res2,res3,res4],
11480 vect2: [res5,res6,res7,res8].
11482 However, in the special case that the result of the widening
11483 operation is used in a reduction computation only, the order doesn't
11484 matter (because when vectorizing a reduction we change the order of
11485 the computation). Some targets can take advantage of this and
11486 generate more efficient code. For example, targets like Altivec,
11487 that support widen_mult using a sequence of {mult_even,mult_odd}
11488 generate the following vectors:
11489 vect1: [res1,res3,res5,res7],
11490 vect2: [res2,res4,res6,res8].
11492 When vectorizing outer-loops, we execute the inner-loop sequentially
11493 (each vectorized inner-loop iteration contributes to VF outer-loop
11494 iterations in parallel). We therefore don't allow to change the
11495 order of the computation in the inner-loop during outer-loop
11496 vectorization. */
11497 /* TODO: Another case in which order doesn't *really* matter is when we
11498 widen and then contract again, e.g. (short)((int)x * y >> 8).
11499 Normally, pack_trunc performs an even/odd permute, whereas the
11500 repack from an even/odd expansion would be an interleave, which
11501 would be significantly simpler for e.g. AVX2. */
11502 /* In any case, in order to avoid duplicating the code below, recurse
11503 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11504 are properly set up for the caller. If we fail, we'll continue with
11505 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11506 if (vect_loop
11507 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11508 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11509 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11510 stmt_info, vectype_out,
11511 vectype_in, code1, code2,
11512 multi_step_cvt, interm_types))
11514 /* Elements in a vector with vect_used_by_reduction property cannot
11515 be reordered if the use chain with this property does not have the
11516 same operation. One such an example is s += a * b, where elements
11517 in a and b cannot be reordered. Here we check if the vector defined
11518 by STMT is only directly used in the reduction statement. */
11519 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11520 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11521 if (use_stmt_info
11522 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11523 return true;
11525 c1 = VEC_WIDEN_MULT_LO_EXPR;
11526 c2 = VEC_WIDEN_MULT_HI_EXPR;
11527 break;
11529 case DOT_PROD_EXPR:
11530 c1 = DOT_PROD_EXPR;
11531 c2 = DOT_PROD_EXPR;
11532 break;
11534 case SAD_EXPR:
11535 c1 = SAD_EXPR;
11536 c2 = SAD_EXPR;
11537 break;
11539 case VEC_WIDEN_MULT_EVEN_EXPR:
11540 /* Support the recursion induced just above. */
11541 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11542 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11543 break;
11545 case WIDEN_LSHIFT_EXPR:
11546 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11547 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11548 break;
11550 CASE_CONVERT:
11551 c1 = VEC_UNPACK_LO_EXPR;
11552 c2 = VEC_UNPACK_HI_EXPR;
11553 break;
11555 case FLOAT_EXPR:
11556 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11557 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11558 break;
11560 case FIX_TRUNC_EXPR:
11561 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11562 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11563 break;
11565 default:
11566 gcc_unreachable ();
11569 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11570 std::swap (c1, c2);
11572 if (code == FIX_TRUNC_EXPR)
11574 /* The signedness is determined from output operand. */
11575 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11576 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11578 else if (CONVERT_EXPR_CODE_P (code)
11579 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11580 && VECTOR_BOOLEAN_TYPE_P (vectype)
11581 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11582 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11584 /* If the input and result modes are the same, a different optab
11585 is needed where we pass in the number of units in vectype. */
11586 optab1 = vec_unpacks_sbool_lo_optab;
11587 optab2 = vec_unpacks_sbool_hi_optab;
11589 else
11591 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11592 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11595 if (!optab1 || !optab2)
11596 return false;
11598 vec_mode = TYPE_MODE (vectype);
11599 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11600 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11601 return false;
11603 *code1 = c1;
11604 *code2 = c2;
11606 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11607 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11609 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11610 return true;
11611 /* For scalar masks we may have different boolean
11612 vector types having the same QImode. Thus we
11613 add additional check for elements number. */
11614 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11615 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11616 return true;
11619 /* Check if it's a multi-step conversion that can be done using intermediate
11620 types. */
11622 prev_type = vectype;
11623 prev_mode = vec_mode;
11625 if (!CONVERT_EXPR_CODE_P (code))
11626 return false;
11628 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11629 intermediate steps in promotion sequence. We try
11630 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11631 not. */
11632 interm_types->create (MAX_INTERM_CVT_STEPS);
11633 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11635 intermediate_mode = insn_data[icode1].operand[0].mode;
11636 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11637 intermediate_type
11638 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11639 else
11640 intermediate_type
11641 = lang_hooks.types.type_for_mode (intermediate_mode,
11642 TYPE_UNSIGNED (prev_type));
11644 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11645 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11646 && intermediate_mode == prev_mode
11647 && SCALAR_INT_MODE_P (prev_mode))
11649 /* If the input and result modes are the same, a different optab
11650 is needed where we pass in the number of units in vectype. */
11651 optab3 = vec_unpacks_sbool_lo_optab;
11652 optab4 = vec_unpacks_sbool_hi_optab;
11654 else
11656 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11657 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11660 if (!optab3 || !optab4
11661 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11662 || insn_data[icode1].operand[0].mode != intermediate_mode
11663 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11664 || insn_data[icode2].operand[0].mode != intermediate_mode
11665 || ((icode1 = optab_handler (optab3, intermediate_mode))
11666 == CODE_FOR_nothing)
11667 || ((icode2 = optab_handler (optab4, intermediate_mode))
11668 == CODE_FOR_nothing))
11669 break;
11671 interm_types->quick_push (intermediate_type);
11672 (*multi_step_cvt)++;
11674 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11675 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11677 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11678 return true;
11679 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11680 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11681 return true;
11684 prev_type = intermediate_type;
11685 prev_mode = intermediate_mode;
11688 interm_types->release ();
11689 return false;
11693 /* Function supportable_narrowing_operation
11695 Check whether an operation represented by the code CODE is a
11696 narrowing operation that is supported by the target platform in
11697 vector form (i.e., when operating on arguments of type VECTYPE_IN
11698 and producing a result of type VECTYPE_OUT).
11700 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11701 and FLOAT. This function checks if these operations are supported by
11702 the target platform directly via vector tree-codes.
11704 Output:
11705 - CODE1 is the code of a vector operation to be used when
11706 vectorizing the operation, if available.
11707 - MULTI_STEP_CVT determines the number of required intermediate steps in
11708 case of multi-step conversion (like int->short->char - in that case
11709 MULTI_STEP_CVT will be 1).
11710 - INTERM_TYPES contains the intermediate type required to perform the
11711 narrowing operation (short in the above example). */
11713 bool
11714 supportable_narrowing_operation (enum tree_code code,
11715 tree vectype_out, tree vectype_in,
11716 enum tree_code *code1, int *multi_step_cvt,
11717 vec<tree> *interm_types)
11719 machine_mode vec_mode;
11720 enum insn_code icode1;
11721 optab optab1, interm_optab;
11722 tree vectype = vectype_in;
11723 tree narrow_vectype = vectype_out;
11724 enum tree_code c1;
11725 tree intermediate_type, prev_type;
11726 machine_mode intermediate_mode, prev_mode;
11727 int i;
11728 bool uns;
11730 *multi_step_cvt = 0;
11731 switch (code)
11733 CASE_CONVERT:
11734 c1 = VEC_PACK_TRUNC_EXPR;
11735 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11736 && VECTOR_BOOLEAN_TYPE_P (vectype)
11737 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11738 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11739 optab1 = vec_pack_sbool_trunc_optab;
11740 else
11741 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11742 break;
11744 case FIX_TRUNC_EXPR:
11745 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11746 /* The signedness is determined from output operand. */
11747 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11748 break;
11750 case FLOAT_EXPR:
11751 c1 = VEC_PACK_FLOAT_EXPR;
11752 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11753 break;
11755 default:
11756 gcc_unreachable ();
11759 if (!optab1)
11760 return false;
11762 vec_mode = TYPE_MODE (vectype);
11763 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11764 return false;
11766 *code1 = c1;
11768 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11770 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11771 return true;
11772 /* For scalar masks we may have different boolean
11773 vector types having the same QImode. Thus we
11774 add additional check for elements number. */
11775 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11776 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11777 return true;
11780 if (code == FLOAT_EXPR)
11781 return false;
11783 /* Check if it's a multi-step conversion that can be done using intermediate
11784 types. */
11785 prev_mode = vec_mode;
11786 prev_type = vectype;
11787 if (code == FIX_TRUNC_EXPR)
11788 uns = TYPE_UNSIGNED (vectype_out);
11789 else
11790 uns = TYPE_UNSIGNED (vectype);
11792 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11793 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11794 costly than signed. */
11795 if (code == FIX_TRUNC_EXPR && uns)
11797 enum insn_code icode2;
11799 intermediate_type
11800 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11801 interm_optab
11802 = optab_for_tree_code (c1, intermediate_type, optab_default);
11803 if (interm_optab != unknown_optab
11804 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11805 && insn_data[icode1].operand[0].mode
11806 == insn_data[icode2].operand[0].mode)
11808 uns = false;
11809 optab1 = interm_optab;
11810 icode1 = icode2;
11814 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11815 intermediate steps in promotion sequence. We try
11816 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11817 interm_types->create (MAX_INTERM_CVT_STEPS);
11818 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11820 intermediate_mode = insn_data[icode1].operand[0].mode;
11821 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11822 intermediate_type
11823 = vect_double_mask_nunits (prev_type, intermediate_mode);
11824 else
11825 intermediate_type
11826 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11827 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11828 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11829 && intermediate_mode == prev_mode
11830 && SCALAR_INT_MODE_P (prev_mode))
11831 interm_optab = vec_pack_sbool_trunc_optab;
11832 else
11833 interm_optab
11834 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11835 optab_default);
11836 if (!interm_optab
11837 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11838 || insn_data[icode1].operand[0].mode != intermediate_mode
11839 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11840 == CODE_FOR_nothing))
11841 break;
11843 interm_types->quick_push (intermediate_type);
11844 (*multi_step_cvt)++;
11846 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11848 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11849 return true;
11850 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11851 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11852 return true;
11855 prev_mode = intermediate_mode;
11856 prev_type = intermediate_type;
11857 optab1 = interm_optab;
11860 interm_types->release ();
11861 return false;
11864 /* Generate and return a statement that sets vector mask MASK such that
11865 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11867 gcall *
11868 vect_gen_while (tree mask, tree start_index, tree end_index)
11870 tree cmp_type = TREE_TYPE (start_index);
11871 tree mask_type = TREE_TYPE (mask);
11872 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11873 cmp_type, mask_type,
11874 OPTIMIZE_FOR_SPEED));
11875 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11876 start_index, end_index,
11877 build_zero_cst (mask_type));
11878 gimple_call_set_lhs (call, mask);
11879 return call;
11882 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11883 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11885 tree
11886 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11887 tree end_index)
11889 tree tmp = make_ssa_name (mask_type);
11890 gcall *call = vect_gen_while (tmp, start_index, end_index);
11891 gimple_seq_add_stmt (seq, call);
11892 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11895 /* Try to compute the vector types required to vectorize STMT_INFO,
11896 returning true on success and false if vectorization isn't possible.
11897 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11898 take sure that the number of elements in the vectors is no bigger
11899 than GROUP_SIZE.
11901 On success:
11903 - Set *STMT_VECTYPE_OUT to:
11904 - NULL_TREE if the statement doesn't need to be vectorized;
11905 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11907 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11908 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11909 statement does not help to determine the overall number of units. */
11911 opt_result
11912 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
11913 tree *stmt_vectype_out,
11914 tree *nunits_vectype_out,
11915 unsigned int group_size)
11917 gimple *stmt = stmt_info->stmt;
11919 /* For BB vectorization, we should always have a group size once we've
11920 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11921 are tentative requests during things like early data reference
11922 analysis and pattern recognition. */
11923 if (is_a <bb_vec_info> (vinfo))
11924 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11925 else
11926 group_size = 0;
11928 *stmt_vectype_out = NULL_TREE;
11929 *nunits_vectype_out = NULL_TREE;
11931 if (gimple_get_lhs (stmt) == NULL_TREE
11932 /* MASK_STORE has no lhs, but is ok. */
11933 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11935 if (is_a <gcall *> (stmt))
11937 /* Ignore calls with no lhs. These must be calls to
11938 #pragma omp simd functions, and what vectorization factor
11939 it really needs can't be determined until
11940 vectorizable_simd_clone_call. */
11941 if (dump_enabled_p ())
11942 dump_printf_loc (MSG_NOTE, vect_location,
11943 "defer to SIMD clone analysis.\n");
11944 return opt_result::success ();
11947 return opt_result::failure_at (stmt,
11948 "not vectorized: irregular stmt.%G", stmt);
11951 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11952 return opt_result::failure_at (stmt,
11953 "not vectorized: vector stmt in loop:%G",
11954 stmt);
11956 tree vectype;
11957 tree scalar_type = NULL_TREE;
11958 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
11960 vectype = STMT_VINFO_VECTYPE (stmt_info);
11961 if (dump_enabled_p ())
11962 dump_printf_loc (MSG_NOTE, vect_location,
11963 "precomputed vectype: %T\n", vectype);
11965 else if (vect_use_mask_type_p (stmt_info))
11967 unsigned int precision = stmt_info->mask_precision;
11968 scalar_type = build_nonstandard_integer_type (precision, 1);
11969 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
11970 if (!vectype)
11971 return opt_result::failure_at (stmt, "not vectorized: unsupported"
11972 " data-type %T\n", scalar_type);
11973 if (dump_enabled_p ())
11974 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11976 else
11978 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
11979 scalar_type = TREE_TYPE (DR_REF (dr));
11980 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
11981 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
11982 else
11983 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
11985 if (dump_enabled_p ())
11987 if (group_size)
11988 dump_printf_loc (MSG_NOTE, vect_location,
11989 "get vectype for scalar type (group size %d):"
11990 " %T\n", group_size, scalar_type);
11991 else
11992 dump_printf_loc (MSG_NOTE, vect_location,
11993 "get vectype for scalar type: %T\n", scalar_type);
11995 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11996 if (!vectype)
11997 return opt_result::failure_at (stmt,
11998 "not vectorized:"
11999 " unsupported data-type %T\n",
12000 scalar_type);
12002 if (dump_enabled_p ())
12003 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12005 *stmt_vectype_out = vectype;
12007 /* Don't try to compute scalar types if the stmt produces a boolean
12008 vector; use the existing vector type instead. */
12009 tree nunits_vectype = vectype;
12010 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12012 /* The number of units is set according to the smallest scalar
12013 type (or the largest vector size, but we only support one
12014 vector size per vectorization). */
12015 HOST_WIDE_INT dummy;
12016 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12017 if (scalar_type != TREE_TYPE (vectype))
12019 if (dump_enabled_p ())
12020 dump_printf_loc (MSG_NOTE, vect_location,
12021 "get vectype for smallest scalar type: %T\n",
12022 scalar_type);
12023 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12024 group_size);
12025 if (!nunits_vectype)
12026 return opt_result::failure_at
12027 (stmt, "not vectorized: unsupported data-type %T\n",
12028 scalar_type);
12029 if (dump_enabled_p ())
12030 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12031 nunits_vectype);
12035 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12036 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12038 if (dump_enabled_p ())
12040 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12041 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12042 dump_printf (MSG_NOTE, "\n");
12045 *nunits_vectype_out = nunits_vectype;
12046 return opt_result::success ();
12049 /* Generate and return statement sequence that sets vector length LEN that is:
12051 min_of_start_and_end = min (START_INDEX, END_INDEX);
12052 left_len = END_INDEX - min_of_start_and_end;
12053 rhs = min (left_len, LEN_LIMIT);
12054 LEN = rhs;
12056 Note: the cost of the code generated by this function is modeled
12057 by vect_estimate_min_profitable_iters, so changes here may need
12058 corresponding changes there. */
12060 gimple_seq
12061 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12063 gimple_seq stmts = NULL;
12064 tree len_type = TREE_TYPE (len);
12065 gcc_assert (TREE_TYPE (start_index) == len_type);
12067 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12068 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12069 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12070 gimple* stmt = gimple_build_assign (len, rhs);
12071 gimple_seq_add_stmt (&stmts, stmt);
12073 return stmts;