[Ada] Fix deleted Compile_Time warnings causing crashes
[official-gcc.git] / gcc / tree-vect-stmts.c
blobc28c9370655aa9fe79df13e9f20aa95de2bd72cc
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
781 if (fatal)
782 *fatal = false;
783 return res;
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
806 int inside_cost = 0, prologue_cost = 0;
808 gcc_assert (cost_vec != NULL);
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
842 static void
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
844 enum vect_def_type *dt,
845 unsigned int ncopies, int pwr,
846 stmt_vector_for_cost *cost_vec,
847 bool widen_arith)
849 int i;
850 int inside_cost = 0, prologue_cost = 0;
852 for (i = 0; i < pwr + 1; i++)
854 inside_cost += record_stmt_cost (cost_vec, ncopies,
855 widen_arith
856 ? vector_stmt : vec_promote_demote,
857 stmt_info, 0, vect_body);
858 ncopies *= 2;
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
865 stmt_info, 0, vect_prologue);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 /* Returns true if the current function returns DECL. */
875 static bool
876 cfun_returns (tree decl)
878 edge_iterator ei;
879 edge e;
880 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
882 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
883 if (!ret)
884 continue;
885 if (gimple_return_retval (ret) == decl)
886 return true;
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
889 though. */
890 gimple *def = ret;
893 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
895 while (gimple_clobber_p (def));
896 if (is_a <gassign *> (def)
897 && gimple_assign_lhs (def) == gimple_return_retval (ret)
898 && gimple_assign_rhs1 (def) == decl)
899 return true;
901 return false;
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
909 static void
910 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
911 vect_memory_access_type memory_access_type,
912 dr_alignment_support alignment_support_scheme,
913 int misalignment,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
925 if (vls_type == VLS_STORE_INVARIANT)
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
975 misalignment, &inside_cost, cost_vec);
977 if (memory_access_type == VMAT_ELEMENTWISE
978 || memory_access_type == VMAT_STRIDED_SLP)
980 /* N scalar stores plus extracting the elements. */
981 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
982 inside_cost += record_stmt_cost (cost_vec,
983 ncopies * assumed_nunits,
984 vec_to_scalar, stmt_info, 0, vect_body);
987 /* When vectorizing a store into the function result assign
988 a penalty if the function returns in a multi-register location.
989 In this case we assume we'll end up with having to spill the
990 vector result and do piecewise loads as a conservative estimate. */
991 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
992 if (base
993 && (TREE_CODE (base) == RESULT_DECL
994 || (DECL_P (base) && cfun_returns (base)))
995 && !aggregate_value_p (base, cfun->decl))
997 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
998 /* ??? Handle PARALLEL in some way. */
999 if (REG_P (reg))
1001 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1002 /* Assume that a single reg-reg move is possible and cheap,
1003 do not account for vector to gp register move cost. */
1004 if (nregs > 1)
1006 /* Spill. */
1007 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1008 vector_store,
1009 stmt_info, 0, vect_epilogue);
1010 /* Loads. */
1011 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1012 scalar_load,
1013 stmt_info, 0, vect_epilogue);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "vect_model_store_cost: inside_cost = %d, "
1021 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1025 /* Calculate cost of DR's memory access. */
1026 void
1027 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1028 dr_alignment_support alignment_support_scheme,
1029 int misalignment,
1030 unsigned int *inside_cost,
1031 stmt_vector_for_cost *body_cost_vec)
1033 switch (alignment_support_scheme)
1035 case dr_aligned:
1037 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1038 vector_store, stmt_info, 0,
1039 vect_body);
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE, vect_location,
1043 "vect_model_store_cost: aligned.\n");
1044 break;
1047 case dr_unaligned_supported:
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1051 unaligned_store, stmt_info,
1052 misalignment, vect_body);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_store_cost: unaligned supported by "
1056 "hardware.\n");
1057 break;
1060 case dr_unaligned_unsupported:
1062 *inside_cost = VECT_MAX_COST;
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1066 "vect_model_store_cost: unsupported access.\n");
1067 break;
1070 default:
1071 gcc_unreachable ();
1076 /* Function vect_model_load_cost
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1083 static void
1084 vect_model_load_cost (vec_info *vinfo,
1085 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1086 vect_memory_access_type memory_access_type,
1087 dr_alignment_support alignment_support_scheme,
1088 int misalignment,
1089 gather_scatter_info *gs_info,
1090 slp_tree slp_node,
1091 stmt_vector_for_cost *cost_vec)
1093 unsigned int inside_cost = 0, prologue_cost = 0;
1094 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1096 gcc_assert (cost_vec);
1098 /* ??? Somehow we need to fix this at the callers. */
1099 if (slp_node)
1100 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1102 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1104 /* If the load is permuted then the alignment is determined by
1105 the first group element not by the first scalar stmt DR. */
1106 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1107 /* Record the cost for the permutation. */
1108 unsigned n_perms, n_loads;
1109 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1110 vf, true, &n_perms, &n_loads);
1111 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1112 first_stmt_info, 0, vect_body);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 ncopies = n_loads;
1119 /* Grouped loads read all elements in the group at once,
1120 so we want the DR for the first statement. */
1121 stmt_vec_info first_stmt_info = stmt_info;
1122 if (!slp_node && grouped_access_p)
1123 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1125 /* True if we should include any once-per-group costs as well as
1126 the cost of the statement itself. For SLP we only get called
1127 once per group anyhow. */
1128 bool first_stmt_p = (first_stmt_info == stmt_info);
1130 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131 ones we actually need. Account for the cost of unused results. */
1132 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1134 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1135 stmt_vec_info next_stmt_info = first_stmt_info;
1138 gaps -= 1;
1139 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1141 while (next_stmt_info);
1142 if (gaps)
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: %d unused vectors.\n",
1147 gaps);
1148 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1149 alignment_support_scheme, misalignment, false,
1150 &inside_cost, &prologue_cost,
1151 cost_vec, cost_vec, true);
1155 /* We assume that the cost of a single load-lanes instruction is
1156 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1157 access is instead being provided by a load-and-permute operation,
1158 include the cost of the permutes. */
1159 if (first_stmt_p
1160 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1162 /* Uses an even and odd extract operations or shuffle operations
1163 for each needed permute. */
1164 int group_size = DR_GROUP_SIZE (first_stmt_info);
1165 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1166 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1167 stmt_info, 0, vect_body);
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE, vect_location,
1171 "vect_model_load_cost: strided group_size = %d .\n",
1172 group_size);
1175 /* The loads themselves. */
1176 if (memory_access_type == VMAT_ELEMENTWISE
1177 || memory_access_type == VMAT_GATHER_SCATTER)
1179 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1180 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1181 if (memory_access_type == VMAT_GATHER_SCATTER
1182 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1183 /* For emulated gathers N offset vector element extracts
1184 (we assume the scalar scaling and ptr + offset add is consumed by
1185 the load). */
1186 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1187 vec_to_scalar, stmt_info, 0,
1188 vect_body);
1189 /* N scalar loads plus gathering them into a vector. */
1190 inside_cost += record_stmt_cost (cost_vec,
1191 ncopies * assumed_nunits,
1192 scalar_load, stmt_info, 0, vect_body);
1194 else if (memory_access_type == VMAT_INVARIANT)
1196 /* Invariant loads will ideally be hoisted and splat to a vector. */
1197 prologue_cost += record_stmt_cost (cost_vec, 1,
1198 scalar_load, stmt_info, 0,
1199 vect_prologue);
1200 prologue_cost += record_stmt_cost (cost_vec, 1,
1201 scalar_to_vec, stmt_info, 0,
1202 vect_prologue);
1204 else
1205 vect_get_load_cost (vinfo, stmt_info, ncopies,
1206 alignment_support_scheme, misalignment, first_stmt_p,
1207 &inside_cost, &prologue_cost,
1208 cost_vec, cost_vec, true);
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_STRIDED_SLP
1211 || (memory_access_type == VMAT_GATHER_SCATTER
1212 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1213 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1214 stmt_info, 0, vect_body);
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE, vect_location,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1223 /* Calculate cost of DR's memory access. */
1224 void
1225 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1226 dr_alignment_support alignment_support_scheme,
1227 int misalignment,
1228 bool add_realign_cost, unsigned int *inside_cost,
1229 unsigned int *prologue_cost,
1230 stmt_vector_for_cost *prologue_cost_vec,
1231 stmt_vector_for_cost *body_cost_vec,
1232 bool record_prologue_costs)
1234 switch (alignment_support_scheme)
1236 case dr_aligned:
1238 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1239 stmt_info, 0, vect_body);
1241 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "vect_model_load_cost: aligned.\n");
1245 break;
1247 case dr_unaligned_supported:
1249 /* Here, we assign an additional cost for the unaligned load. */
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1251 unaligned_load, stmt_info,
1252 misalignment, vect_body);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1259 break;
1261 case dr_explicit_realign:
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1279 break;
1281 case dr_explicit_realign_optimized:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost && record_prologue_costs)
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1316 break;
1319 case dr_unaligned_unsupported:
1321 *inside_cost = VECT_MAX_COST;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1329 default:
1330 gcc_unreachable ();
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1337 static void
1338 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1341 if (gsi)
1342 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1343 else
1344 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1346 if (dump_enabled_p ())
1347 dump_printf_loc (MSG_NOTE, vect_location,
1348 "created new init_stmt: %G", new_stmt);
1351 /* Function vect_init_vector.
1353 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1355 vector type a vector with all elements equal to VAL is created first.
1356 Place the initialization at GSI if it is not NULL. Otherwise, place the
1357 initialization at the loop preheader.
1358 Return the DEF of INIT_STMT.
1359 It will be used in the vectorization of STMT_INFO. */
1361 tree
1362 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1363 gimple_stmt_iterator *gsi)
1365 gimple *init_stmt;
1366 tree new_temp;
1368 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1369 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1371 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1372 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1374 /* Scalar boolean value should be transformed into
1375 all zeros or all ones value before building a vector. */
1376 if (VECTOR_BOOLEAN_TYPE_P (type))
1378 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1379 tree false_val = build_zero_cst (TREE_TYPE (type));
1381 if (CONSTANT_CLASS_P (val))
1382 val = integer_zerop (val) ? false_val : true_val;
1383 else
1385 new_temp = make_ssa_name (TREE_TYPE (type));
1386 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1387 val, true_val, false_val);
1388 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1389 val = new_temp;
1392 else
1394 gimple_seq stmts = NULL;
1395 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1396 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1397 TREE_TYPE (type), val);
1398 else
1399 /* ??? Condition vectorization expects us to do
1400 promotion of invariant/external defs. */
1401 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1402 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1403 !gsi_end_p (gsi2); )
1405 init_stmt = gsi_stmt (gsi2);
1406 gsi_remove (&gsi2, false);
1407 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1411 val = build_vector_from_val (type, val);
1414 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1415 init_stmt = gimple_build_assign (new_temp, val);
1416 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1417 return new_temp;
1421 /* Function vect_get_vec_defs_for_operand.
1423 OP is an operand in STMT_VINFO. This function returns a vector of
1424 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1426 In the case that OP is an SSA_NAME which is defined in the loop, then
1427 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1429 In case OP is an invariant or constant, a new stmt that creates a vector def
1430 needs to be introduced. VECTYPE may be used to specify a required type for
1431 vector invariant. */
1433 void
1434 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1435 unsigned ncopies,
1436 tree op, vec<tree> *vec_oprnds, tree vectype)
1438 gimple *def_stmt;
1439 enum vect_def_type dt;
1440 bool is_simple_use;
1441 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE, vect_location,
1445 "vect_get_vec_defs_for_operand: %T\n", op);
1447 stmt_vec_info def_stmt_info;
1448 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1449 &def_stmt_info, &def_stmt);
1450 gcc_assert (is_simple_use);
1451 if (def_stmt && dump_enabled_p ())
1452 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1454 vec_oprnds->create (ncopies);
1455 if (dt == vect_constant_def || dt == vect_external_def)
1457 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1458 tree vector_type;
1460 if (vectype)
1461 vector_type = vectype;
1462 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1463 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1464 vector_type = truth_type_for (stmt_vectype);
1465 else
1466 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1468 gcc_assert (vector_type);
1469 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1470 while (ncopies--)
1471 vec_oprnds->quick_push (vop);
1473 else
1475 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1476 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1477 for (unsigned i = 0; i < ncopies; ++i)
1478 vec_oprnds->quick_push (gimple_get_lhs
1479 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1484 /* Get vectorized definitions for OP0 and OP1. */
1486 void
1487 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1488 unsigned ncopies,
1489 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1490 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1491 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1492 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1494 if (slp_node)
1496 if (op0)
1497 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1498 if (op1)
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1500 if (op2)
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1502 if (op3)
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1505 else
1507 if (op0)
1508 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1509 op0, vec_oprnds0, vectype0);
1510 if (op1)
1511 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1512 op1, vec_oprnds1, vectype1);
1513 if (op2)
1514 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1515 op2, vec_oprnds2, vectype2);
1516 if (op3)
1517 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1518 op3, vec_oprnds3, vectype3);
1522 void
1523 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1524 unsigned ncopies,
1525 tree op0, vec<tree> *vec_oprnds0,
1526 tree op1, vec<tree> *vec_oprnds1,
1527 tree op2, vec<tree> *vec_oprnds2,
1528 tree op3, vec<tree> *vec_oprnds3)
1530 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1531 op0, vec_oprnds0, NULL_TREE,
1532 op1, vec_oprnds1, NULL_TREE,
1533 op2, vec_oprnds2, NULL_TREE,
1534 op3, vec_oprnds3, NULL_TREE);
1537 /* Helper function called by vect_finish_replace_stmt and
1538 vect_finish_stmt_generation. Set the location of the new
1539 statement and create and return a stmt_vec_info for it. */
1541 static void
1542 vect_finish_stmt_generation_1 (vec_info *,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1548 if (stmt_info)
1550 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1552 /* While EH edges will generally prevent vectorization, stmt might
1553 e.g. be in a must-not-throw region. Ensure newly created stmts
1554 that could throw are part of the same region. */
1555 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1556 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1557 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1559 else
1560 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564 which sets the same scalar result as STMT_INFO did. Create and return a
1565 stmt_vec_info for VEC_STMT. */
1567 void
1568 vect_finish_replace_stmt (vec_info *vinfo,
1569 stmt_vec_info stmt_info, gimple *vec_stmt)
1571 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1572 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1574 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1575 gsi_replace (&gsi, vec_stmt, true);
1577 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1583 void
1584 vect_finish_stmt_generation (vec_info *vinfo,
1585 stmt_vec_info stmt_info, gimple *vec_stmt,
1586 gimple_stmt_iterator *gsi)
1588 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1590 if (!gsi_end_p (*gsi)
1591 && gimple_has_mem_ops (vec_stmt))
1593 gimple *at_stmt = gsi_stmt (*gsi);
1594 tree vuse = gimple_vuse (at_stmt);
1595 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1597 tree vdef = gimple_vdef (at_stmt);
1598 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1599 gimple_set_modified (vec_stmt, true);
1600 /* If we have an SSA vuse and insert a store, update virtual
1601 SSA form to avoid triggering the renamer. Do so only
1602 if we can easily see all uses - which is what almost always
1603 happens with the way vectorized stmts are inserted. */
1604 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1605 && ((is_gimple_assign (vec_stmt)
1606 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1607 || (is_gimple_call (vec_stmt)
1608 && !(gimple_call_flags (vec_stmt)
1609 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1611 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1612 gimple_set_vdef (vec_stmt, new_vdef);
1613 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1617 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1618 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1621 /* We want to vectorize a call to combined function CFN with function
1622 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623 as the types of all inputs. Check whether this is possible using
1624 an internal function, returning its code if so or IFN_LAST if not. */
1626 static internal_fn
1627 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1628 tree vectype_out, tree vectype_in)
1630 internal_fn ifn;
1631 if (internal_fn_p (cfn))
1632 ifn = as_internal_fn (cfn);
1633 else
1634 ifn = associated_internal_fn (fndecl);
1635 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1637 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1638 if (info.vectorizable)
1640 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1641 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1642 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1643 OPTIMIZE_FOR_SPEED))
1644 return ifn;
1647 return IFN_LAST;
1651 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1652 gimple_stmt_iterator *);
1654 /* Check whether a load or store statement in the loop described by
1655 LOOP_VINFO is possible in a loop using partial vectors. This is
1656 testing whether the vectorizer pass has the appropriate support,
1657 as well as whether the target does.
1659 VLS_TYPE says whether the statement is a load or store and VECTYPE
1660 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1661 says how the load or store is going to be implemented and GROUP_SIZE
1662 is the number of load or store statements in the containing group.
1663 If the access is a gather load or scatter store, GS_INFO describes
1664 its arguments. If the load or store is conditional, SCALAR_MASK is the
1665 condition under which it occurs.
1667 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668 vectors is not supported, otherwise record the required rgroup control
1669 types. */
1671 static void
1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1673 vec_load_store_type vls_type,
1674 int group_size,
1675 vect_memory_access_type
1676 memory_access_type,
1677 gather_scatter_info *gs_info,
1678 tree scalar_mask)
1680 /* Invariant loads need no special support. */
1681 if (memory_access_type == VMAT_INVARIANT)
1682 return;
1684 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1685 machine_mode vecmode = TYPE_MODE (vectype);
1686 bool is_load = (vls_type == VLS_LOAD);
1687 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1689 if (is_load
1690 ? !vect_load_lanes_supported (vectype, group_size, true)
1691 : !vect_store_lanes_supported (vectype, group_size, true))
1693 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1695 "can't operate on partial vectors because"
1696 " the target doesn't have an appropriate"
1697 " load/store-lanes instruction.\n");
1698 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1699 return;
1701 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1702 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1703 return;
1706 if (memory_access_type == VMAT_GATHER_SCATTER)
1708 internal_fn ifn = (is_load
1709 ? IFN_MASK_GATHER_LOAD
1710 : IFN_MASK_SCATTER_STORE);
1711 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1712 gs_info->memory_type,
1713 gs_info->offset_vectype,
1714 gs_info->scale))
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1718 "can't operate on partial vectors because"
1719 " the target doesn't have an appropriate"
1720 " gather load or scatter store instruction.\n");
1721 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1722 return;
1724 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1725 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1726 return;
1729 if (memory_access_type != VMAT_CONTIGUOUS
1730 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1732 /* Element X of the data must come from iteration i * VF + X of the
1733 scalar loop. We need more work to support other mappings. */
1734 if (dump_enabled_p ())
1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1736 "can't operate on partial vectors because an"
1737 " access isn't contiguous.\n");
1738 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1739 return;
1742 if (!VECTOR_MODE_P (vecmode))
1744 if (dump_enabled_p ())
1745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1746 "can't operate on partial vectors when emulating"
1747 " vector operations.\n");
1748 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1749 return;
1752 /* We might load more scalars than we need for permuting SLP loads.
1753 We checked in get_group_load_store_type that the extra elements
1754 don't leak into a new vector. */
1755 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1757 unsigned int nvectors;
1758 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1759 return nvectors;
1760 gcc_unreachable ();
1763 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1764 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1765 machine_mode mask_mode;
1766 bool using_partial_vectors_p = false;
1767 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1768 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1770 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1771 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1772 using_partial_vectors_p = true;
1775 machine_mode vmode;
1776 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1778 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1779 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1780 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1781 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1782 using_partial_vectors_p = true;
1785 if (!using_partial_vectors_p)
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "can't operate on partial vectors because the"
1790 " target doesn't have the appropriate partial"
1791 " vectorization load or store.\n");
1792 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1796 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1797 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1798 that needs to be applied to all loads and stores in a vectorized loop.
1799 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1801 MASK_TYPE is the type of both masks. If new statements are needed,
1802 insert them before GSI. */
1804 static tree
1805 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1806 gimple_stmt_iterator *gsi)
1808 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1809 if (!loop_mask)
1810 return vec_mask;
1812 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1813 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1814 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1815 vec_mask, loop_mask);
1816 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1817 return and_res;
1820 /* Determine whether we can use a gather load or scatter store to vectorize
1821 strided load or store STMT_INFO by truncating the current offset to a
1822 smaller width. We need to be able to construct an offset vector:
1824 { 0, X, X*2, X*3, ... }
1826 without loss of precision, where X is STMT_INFO's DR_STEP.
1828 Return true if this is possible, describing the gather load or scatter
1829 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1831 static bool
1832 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1833 loop_vec_info loop_vinfo, bool masked_p,
1834 gather_scatter_info *gs_info)
1836 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1837 data_reference *dr = dr_info->dr;
1838 tree step = DR_STEP (dr);
1839 if (TREE_CODE (step) != INTEGER_CST)
1841 /* ??? Perhaps we could use range information here? */
1842 if (dump_enabled_p ())
1843 dump_printf_loc (MSG_NOTE, vect_location,
1844 "cannot truncate variable step.\n");
1845 return false;
1848 /* Get the number of bits in an element. */
1849 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1850 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1851 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1853 /* Set COUNT to the upper limit on the number of elements - 1.
1854 Start with the maximum vectorization factor. */
1855 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1857 /* Try lowering COUNT to the number of scalar latch iterations. */
1858 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1859 widest_int max_iters;
1860 if (max_loop_iterations (loop, &max_iters)
1861 && max_iters < count)
1862 count = max_iters.to_shwi ();
1864 /* Try scales of 1 and the element size. */
1865 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1866 wi::overflow_type overflow = wi::OVF_NONE;
1867 for (int i = 0; i < 2; ++i)
1869 int scale = scales[i];
1870 widest_int factor;
1871 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1872 continue;
1874 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1875 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1876 if (overflow)
1877 continue;
1878 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1879 unsigned int min_offset_bits = wi::min_precision (range, sign);
1881 /* Find the narrowest viable offset type. */
1882 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1883 tree offset_type = build_nonstandard_integer_type (offset_bits,
1884 sign == UNSIGNED);
1886 /* See whether the target supports the operation with an offset
1887 no narrower than OFFSET_TYPE. */
1888 tree memory_type = TREE_TYPE (DR_REF (dr));
1889 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1890 vectype, memory_type, offset_type, scale,
1891 &gs_info->ifn, &gs_info->offset_vectype)
1892 || gs_info->ifn == IFN_LAST)
1893 continue;
1895 gs_info->decl = NULL_TREE;
1896 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1897 but we don't need to store that here. */
1898 gs_info->base = NULL_TREE;
1899 gs_info->element_type = TREE_TYPE (vectype);
1900 gs_info->offset = fold_convert (offset_type, step);
1901 gs_info->offset_dt = vect_constant_def;
1902 gs_info->scale = scale;
1903 gs_info->memory_type = memory_type;
1904 return true;
1907 if (overflow && dump_enabled_p ())
1908 dump_printf_loc (MSG_NOTE, vect_location,
1909 "truncating gather/scatter offset to %d bits"
1910 " might change its value.\n", element_bits);
1912 return false;
1915 /* Return true if we can use gather/scatter internal functions to
1916 vectorize STMT_INFO, which is a grouped or strided load or store.
1917 MASKED_P is true if load or store is conditional. When returning
1918 true, fill in GS_INFO with the information required to perform the
1919 operation. */
1921 static bool
1922 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1923 loop_vec_info loop_vinfo, bool masked_p,
1924 gather_scatter_info *gs_info)
1926 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1927 || gs_info->ifn == IFN_LAST)
1928 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1929 masked_p, gs_info);
1931 tree old_offset_type = TREE_TYPE (gs_info->offset);
1932 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1934 gcc_assert (TYPE_PRECISION (new_offset_type)
1935 >= TYPE_PRECISION (old_offset_type));
1936 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1938 if (dump_enabled_p ())
1939 dump_printf_loc (MSG_NOTE, vect_location,
1940 "using gather/scatter for strided/grouped access,"
1941 " scale = %d\n", gs_info->scale);
1943 return true;
1946 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1947 elements with a known constant step. Return -1 if that step
1948 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1950 static int
1951 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1953 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1954 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1955 size_zero_node);
1958 /* If the target supports a permute mask that reverses the elements in
1959 a vector of type VECTYPE, return that mask, otherwise return null. */
1961 static tree
1962 perm_mask_for_reverse (tree vectype)
1964 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1966 /* The encoding has a single stepped pattern. */
1967 vec_perm_builder sel (nunits, 1, 3);
1968 for (int i = 0; i < 3; ++i)
1969 sel.quick_push (nunits - 1 - i);
1971 vec_perm_indices indices (sel, 1, nunits);
1972 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1973 return NULL_TREE;
1974 return vect_gen_perm_mask_checked (vectype, indices);
1977 /* A subroutine of get_load_store_type, with a subset of the same
1978 arguments. Handle the case where STMT_INFO is a load or store that
1979 accesses consecutive elements with a negative step. Sets *POFFSET
1980 to the offset to be applied to the DR for the first access. */
1982 static vect_memory_access_type
1983 get_negative_load_store_type (vec_info *vinfo,
1984 stmt_vec_info stmt_info, tree vectype,
1985 vec_load_store_type vls_type,
1986 unsigned int ncopies, poly_int64 *poffset)
1988 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1989 dr_alignment_support alignment_support_scheme;
1991 if (ncopies > 1)
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 "multiple types with negative step.\n");
1996 return VMAT_ELEMENTWISE;
1999 int misalignment = dr_misalignment (dr_info, vectype);
2000 alignment_support_scheme
2001 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2002 if (alignment_support_scheme != dr_aligned
2003 && alignment_support_scheme != dr_unaligned_supported)
2005 if (dump_enabled_p ())
2006 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2007 "negative step but alignment required.\n");
2008 return VMAT_ELEMENTWISE;
2011 if (vls_type == VLS_STORE_INVARIANT)
2013 if (dump_enabled_p ())
2014 dump_printf_loc (MSG_NOTE, vect_location,
2015 "negative step with invariant source;"
2016 " no permute needed.\n");
2017 *poffset = -TYPE_VECTOR_SUBPARTS (vectype) + 1;
2018 return VMAT_CONTIGUOUS_DOWN;
2021 if (!perm_mask_for_reverse (vectype))
2023 if (dump_enabled_p ())
2024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2025 "negative step and reversing not supported.\n");
2026 return VMAT_ELEMENTWISE;
2029 *poffset = -TYPE_VECTOR_SUBPARTS (vectype) + 1;
2030 return VMAT_CONTIGUOUS_REVERSE;
2033 /* STMT_INFO is either a masked or unconditional store. Return the value
2034 being stored. */
2036 tree
2037 vect_get_store_rhs (stmt_vec_info stmt_info)
2039 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2041 gcc_assert (gimple_assign_single_p (assign));
2042 return gimple_assign_rhs1 (assign);
2044 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2046 internal_fn ifn = gimple_call_internal_fn (call);
2047 int index = internal_fn_stored_value_index (ifn);
2048 gcc_assert (index >= 0);
2049 return gimple_call_arg (call, index);
2051 gcc_unreachable ();
2054 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2056 This function returns a vector type which can be composed with NETLS pieces,
2057 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2058 same vector size as the return vector. It checks target whether supports
2059 pieces-size vector mode for construction firstly, if target fails to, check
2060 pieces-size scalar mode for construction further. It returns NULL_TREE if
2061 fails to find the available composition.
2063 For example, for (vtype=V16QI, nelts=4), we can probably get:
2064 - V16QI with PTYPE V4QI.
2065 - V4SI with PTYPE SI.
2066 - NULL_TREE. */
2068 static tree
2069 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2071 gcc_assert (VECTOR_TYPE_P (vtype));
2072 gcc_assert (known_gt (nelts, 0U));
2074 machine_mode vmode = TYPE_MODE (vtype);
2075 if (!VECTOR_MODE_P (vmode))
2076 return NULL_TREE;
2078 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2079 unsigned int pbsize;
2080 if (constant_multiple_p (vbsize, nelts, &pbsize))
2082 /* First check if vec_init optab supports construction from
2083 vector pieces directly. */
2084 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2085 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2086 machine_mode rmode;
2087 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2088 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2089 != CODE_FOR_nothing))
2091 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2092 return vtype;
2095 /* Otherwise check if exists an integer type of the same piece size and
2096 if vec_init optab supports construction from it directly. */
2097 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2098 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2099 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2100 != CODE_FOR_nothing))
2102 *ptype = build_nonstandard_integer_type (pbsize, 1);
2103 return build_vector_type (*ptype, nelts);
2107 return NULL_TREE;
2110 /* A subroutine of get_load_store_type, with a subset of the same
2111 arguments. Handle the case where STMT_INFO is part of a grouped load
2112 or store.
2114 For stores, the statements in the group are all consecutive
2115 and there is no gap at the end. For loads, the statements in the
2116 group might not be consecutive; there can be gaps between statements
2117 as well as at the end. */
2119 static bool
2120 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2121 tree vectype, slp_tree slp_node,
2122 bool masked_p, vec_load_store_type vls_type,
2123 vect_memory_access_type *memory_access_type,
2124 poly_int64 *poffset,
2125 dr_alignment_support *alignment_support_scheme,
2126 int *misalignment,
2127 gather_scatter_info *gs_info)
2129 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2130 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2131 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2132 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2133 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2134 bool single_element_p = (stmt_info == first_stmt_info
2135 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2136 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2137 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2139 /* True if the vectorized statements would access beyond the last
2140 statement in the group. */
2141 bool overrun_p = false;
2143 /* True if we can cope with such overrun by peeling for gaps, so that
2144 there is at least one final scalar iteration after the vector loop. */
2145 bool can_overrun_p = (!masked_p
2146 && vls_type == VLS_LOAD
2147 && loop_vinfo
2148 && !loop->inner);
2150 /* There can only be a gap at the end of the group if the stride is
2151 known at compile time. */
2152 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2154 /* Stores can't yet have gaps. */
2155 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2157 if (slp_node)
2159 /* For SLP vectorization we directly vectorize a subchain
2160 without permutation. */
2161 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2162 first_dr_info
2163 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2164 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2166 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2167 separated by the stride, until we have a complete vector.
2168 Fall back to scalar accesses if that isn't possible. */
2169 if (multiple_p (nunits, group_size))
2170 *memory_access_type = VMAT_STRIDED_SLP;
2171 else
2172 *memory_access_type = VMAT_ELEMENTWISE;
2174 else
2176 overrun_p = loop_vinfo && gap != 0;
2177 if (overrun_p && vls_type != VLS_LOAD)
2179 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2180 "Grouped store with gaps requires"
2181 " non-consecutive accesses\n");
2182 return false;
2184 /* An overrun is fine if the trailing elements are smaller
2185 than the alignment boundary B. Every vector access will
2186 be a multiple of B and so we are guaranteed to access a
2187 non-gap element in the same B-sized block. */
2188 if (overrun_p
2189 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2190 vectype)
2191 / vect_get_scalar_dr_size (first_dr_info)))
2192 overrun_p = false;
2194 /* If the gap splits the vector in half and the target
2195 can do half-vector operations avoid the epilogue peeling
2196 by simply loading half of the vector only. Usually
2197 the construction with an upper zero half will be elided. */
2198 dr_alignment_support alss;
2199 int misalign = dr_misalignment (first_dr_info, vectype);
2200 tree half_vtype;
2201 if (overrun_p
2202 && !masked_p
2203 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2204 vectype, misalign)))
2205 == dr_aligned
2206 || alss == dr_unaligned_supported)
2207 && known_eq (nunits, (group_size - gap) * 2)
2208 && known_eq (nunits, group_size)
2209 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2210 != NULL_TREE))
2211 overrun_p = false;
2213 if (overrun_p && !can_overrun_p)
2215 if (dump_enabled_p ())
2216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2217 "Peeling for outer loop is not supported\n");
2218 return false;
2220 int cmp = compare_step_with_zero (vinfo, stmt_info);
2221 if (cmp < 0)
2223 if (single_element_p)
2224 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2225 only correct for single element "interleaving" SLP. */
2226 *memory_access_type = get_negative_load_store_type
2227 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2228 else
2230 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2231 separated by the stride, until we have a complete vector.
2232 Fall back to scalar accesses if that isn't possible. */
2233 if (multiple_p (nunits, group_size))
2234 *memory_access_type = VMAT_STRIDED_SLP;
2235 else
2236 *memory_access_type = VMAT_ELEMENTWISE;
2239 else
2241 gcc_assert (!loop_vinfo || cmp > 0);
2242 *memory_access_type = VMAT_CONTIGUOUS;
2246 else
2248 /* We can always handle this case using elementwise accesses,
2249 but see if something more efficient is available. */
2250 *memory_access_type = VMAT_ELEMENTWISE;
2252 /* If there is a gap at the end of the group then these optimizations
2253 would access excess elements in the last iteration. */
2254 bool would_overrun_p = (gap != 0);
2255 /* An overrun is fine if the trailing elements are smaller than the
2256 alignment boundary B. Every vector access will be a multiple of B
2257 and so we are guaranteed to access a non-gap element in the
2258 same B-sized block. */
2259 if (would_overrun_p
2260 && !masked_p
2261 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2262 / vect_get_scalar_dr_size (first_dr_info)))
2263 would_overrun_p = false;
2265 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2266 && (can_overrun_p || !would_overrun_p)
2267 && compare_step_with_zero (vinfo, stmt_info) > 0)
2269 /* First cope with the degenerate case of a single-element
2270 vector. */
2271 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2274 /* Otherwise try using LOAD/STORE_LANES. */
2275 else if (vls_type == VLS_LOAD
2276 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2277 : vect_store_lanes_supported (vectype, group_size,
2278 masked_p))
2280 *memory_access_type = VMAT_LOAD_STORE_LANES;
2281 overrun_p = would_overrun_p;
2284 /* If that fails, try using permuting loads. */
2285 else if (vls_type == VLS_LOAD
2286 ? vect_grouped_load_supported (vectype, single_element_p,
2287 group_size)
2288 : vect_grouped_store_supported (vectype, group_size))
2290 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2291 overrun_p = would_overrun_p;
2295 /* As a last resort, trying using a gather load or scatter store.
2297 ??? Although the code can handle all group sizes correctly,
2298 it probably isn't a win to use separate strided accesses based
2299 on nearby locations. Or, even if it's a win over scalar code,
2300 it might not be a win over vectorizing at a lower VF, if that
2301 allows us to use contiguous accesses. */
2302 if (*memory_access_type == VMAT_ELEMENTWISE
2303 && single_element_p
2304 && loop_vinfo
2305 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2306 masked_p, gs_info))
2307 *memory_access_type = VMAT_GATHER_SCATTER;
2310 if (*memory_access_type == VMAT_GATHER_SCATTER
2311 || *memory_access_type == VMAT_ELEMENTWISE)
2313 *alignment_support_scheme = dr_unaligned_supported;
2314 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2316 else
2318 *misalignment = dr_misalignment (first_dr_info, vectype);
2319 *alignment_support_scheme
2320 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2321 *misalignment);
2324 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2326 /* STMT is the leader of the group. Check the operands of all the
2327 stmts of the group. */
2328 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2329 while (next_stmt_info)
2331 tree op = vect_get_store_rhs (next_stmt_info);
2332 enum vect_def_type dt;
2333 if (!vect_is_simple_use (op, vinfo, &dt))
2335 if (dump_enabled_p ())
2336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2337 "use not simple.\n");
2338 return false;
2340 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2344 if (overrun_p)
2346 gcc_assert (can_overrun_p);
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2349 "Data access with gaps requires scalar "
2350 "epilogue loop\n");
2351 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2354 return true;
2357 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2358 if there is a memory access type that the vectorized form can use,
2359 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2360 or scatters, fill in GS_INFO accordingly. In addition
2361 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2362 the target does not support the alignment scheme. *MISALIGNMENT
2363 is set according to the alignment of the access (including
2364 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2366 SLP says whether we're performing SLP rather than loop vectorization.
2367 MASKED_P is true if the statement is conditional on a vectorized mask.
2368 VECTYPE is the vector type that the vectorized statements will use.
2369 NCOPIES is the number of vector statements that will be needed. */
2371 static bool
2372 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2373 tree vectype, slp_tree slp_node,
2374 bool masked_p, vec_load_store_type vls_type,
2375 unsigned int ncopies,
2376 vect_memory_access_type *memory_access_type,
2377 poly_int64 *poffset,
2378 dr_alignment_support *alignment_support_scheme,
2379 int *misalignment,
2380 gather_scatter_info *gs_info)
2382 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2383 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2384 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2385 *poffset = 0;
2386 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2388 *memory_access_type = VMAT_GATHER_SCATTER;
2389 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2390 gcc_unreachable ();
2391 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2392 &gs_info->offset_dt,
2393 &gs_info->offset_vectype))
2395 if (dump_enabled_p ())
2396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2397 "%s index use not simple.\n",
2398 vls_type == VLS_LOAD ? "gather" : "scatter");
2399 return false;
2401 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2403 if (vls_type != VLS_LOAD)
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "unsupported emulated scatter.\n");
2408 return false;
2410 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2411 || !TYPE_VECTOR_SUBPARTS
2412 (gs_info->offset_vectype).is_constant ()
2413 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2414 (gs_info->offset_vectype),
2415 TYPE_VECTOR_SUBPARTS (vectype)))
2417 if (dump_enabled_p ())
2418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2419 "unsupported vector types for emulated "
2420 "gather.\n");
2421 return false;
2424 /* Gather-scatter accesses perform only component accesses, alignment
2425 is irrelevant for them. */
2426 *alignment_support_scheme = dr_unaligned_supported;
2428 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2430 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2431 masked_p,
2432 vls_type, memory_access_type, poffset,
2433 alignment_support_scheme,
2434 misalignment, gs_info))
2435 return false;
2437 else if (STMT_VINFO_STRIDED_P (stmt_info))
2439 gcc_assert (!slp_node);
2440 if (loop_vinfo
2441 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2442 masked_p, gs_info))
2443 *memory_access_type = VMAT_GATHER_SCATTER;
2444 else
2445 *memory_access_type = VMAT_ELEMENTWISE;
2446 /* Alignment is irrelevant here. */
2447 *alignment_support_scheme = dr_unaligned_supported;
2449 else
2451 int cmp = compare_step_with_zero (vinfo, stmt_info);
2452 if (cmp == 0)
2454 gcc_assert (vls_type == VLS_LOAD);
2455 *memory_access_type = VMAT_INVARIANT;
2456 /* Invariant accesses perform only component accesses, alignment
2457 is irrelevant for them. */
2458 *alignment_support_scheme = dr_unaligned_supported;
2460 else
2462 if (cmp < 0)
2463 *memory_access_type = get_negative_load_store_type
2464 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2465 else
2466 *memory_access_type = VMAT_CONTIGUOUS;
2467 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2468 vectype);
2469 *alignment_support_scheme
2470 = vect_supportable_dr_alignment (vinfo,
2471 STMT_VINFO_DR_INFO (stmt_info),
2472 vectype, *misalignment);
2476 if ((*memory_access_type == VMAT_ELEMENTWISE
2477 || *memory_access_type == VMAT_STRIDED_SLP)
2478 && !nunits.is_constant ())
2480 if (dump_enabled_p ())
2481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2482 "Not using elementwise accesses due to variable "
2483 "vectorization factor.\n");
2484 return false;
2487 if (*alignment_support_scheme == dr_unaligned_unsupported)
2489 if (dump_enabled_p ())
2490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2491 "unsupported unaligned access\n");
2492 return false;
2495 /* FIXME: At the moment the cost model seems to underestimate the
2496 cost of using elementwise accesses. This check preserves the
2497 traditional behavior until that can be fixed. */
2498 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2499 if (!first_stmt_info)
2500 first_stmt_info = stmt_info;
2501 if (*memory_access_type == VMAT_ELEMENTWISE
2502 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2503 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2504 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2505 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2507 if (dump_enabled_p ())
2508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2509 "not falling back to elementwise accesses\n");
2510 return false;
2512 return true;
2515 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2516 conditional operation STMT_INFO. When returning true, store the mask
2517 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2518 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2519 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2521 static bool
2522 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2523 slp_tree slp_node, unsigned mask_index,
2524 tree *mask, slp_tree *mask_node,
2525 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2527 enum vect_def_type mask_dt;
2528 tree mask_vectype;
2529 slp_tree mask_node_1;
2530 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2531 mask, &mask_node_1, &mask_dt, &mask_vectype))
2533 if (dump_enabled_p ())
2534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2535 "mask use not simple.\n");
2536 return false;
2539 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2541 if (dump_enabled_p ())
2542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2543 "mask argument is not a boolean.\n");
2544 return false;
2547 /* If the caller is not prepared for adjusting an external/constant
2548 SLP mask vector type fail. */
2549 if (slp_node
2550 && !mask_node
2551 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2553 if (dump_enabled_p ())
2554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2555 "SLP mask argument is not vectorized.\n");
2556 return false;
2559 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2560 if (!mask_vectype)
2561 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2563 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2565 if (dump_enabled_p ())
2566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2567 "could not find an appropriate vector mask type.\n");
2568 return false;
2571 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2572 TYPE_VECTOR_SUBPARTS (vectype)))
2574 if (dump_enabled_p ())
2575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2576 "vector mask type %T"
2577 " does not match vector data type %T.\n",
2578 mask_vectype, vectype);
2580 return false;
2583 *mask_dt_out = mask_dt;
2584 *mask_vectype_out = mask_vectype;
2585 if (mask_node)
2586 *mask_node = mask_node_1;
2587 return true;
2590 /* Return true if stored value RHS is suitable for vectorizing store
2591 statement STMT_INFO. When returning true, store the type of the
2592 definition in *RHS_DT_OUT, the type of the vectorized store value in
2593 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2595 static bool
2596 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2597 slp_tree slp_node, tree rhs,
2598 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2599 vec_load_store_type *vls_type_out)
2601 /* In the case this is a store from a constant make sure
2602 native_encode_expr can handle it. */
2603 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2605 if (dump_enabled_p ())
2606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2607 "cannot encode constant as a byte sequence.\n");
2608 return false;
2611 unsigned op_no = 0;
2612 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2614 if (gimple_call_internal_p (call)
2615 && internal_store_fn_p (gimple_call_internal_fn (call)))
2616 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2619 enum vect_def_type rhs_dt;
2620 tree rhs_vectype;
2621 slp_tree slp_op;
2622 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2623 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2625 if (dump_enabled_p ())
2626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2627 "use not simple.\n");
2628 return false;
2631 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2632 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2634 if (dump_enabled_p ())
2635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2636 "incompatible vector types.\n");
2637 return false;
2640 *rhs_dt_out = rhs_dt;
2641 *rhs_vectype_out = rhs_vectype;
2642 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2643 *vls_type_out = VLS_STORE_INVARIANT;
2644 else
2645 *vls_type_out = VLS_STORE;
2646 return true;
2649 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2650 Note that we support masks with floating-point type, in which case the
2651 floats are interpreted as a bitmask. */
2653 static tree
2654 vect_build_all_ones_mask (vec_info *vinfo,
2655 stmt_vec_info stmt_info, tree masktype)
2657 if (TREE_CODE (masktype) == INTEGER_TYPE)
2658 return build_int_cst (masktype, -1);
2659 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2661 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2662 mask = build_vector_from_val (masktype, mask);
2663 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2665 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2667 REAL_VALUE_TYPE r;
2668 long tmp[6];
2669 for (int j = 0; j < 6; ++j)
2670 tmp[j] = -1;
2671 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2672 tree mask = build_real (TREE_TYPE (masktype), r);
2673 mask = build_vector_from_val (masktype, mask);
2674 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2676 gcc_unreachable ();
2679 /* Build an all-zero merge value of type VECTYPE while vectorizing
2680 STMT_INFO as a gather load. */
2682 static tree
2683 vect_build_zero_merge_argument (vec_info *vinfo,
2684 stmt_vec_info stmt_info, tree vectype)
2686 tree merge;
2687 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2688 merge = build_int_cst (TREE_TYPE (vectype), 0);
2689 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2691 REAL_VALUE_TYPE r;
2692 long tmp[6];
2693 for (int j = 0; j < 6; ++j)
2694 tmp[j] = 0;
2695 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2696 merge = build_real (TREE_TYPE (vectype), r);
2698 else
2699 gcc_unreachable ();
2700 merge = build_vector_from_val (vectype, merge);
2701 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2704 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2705 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2706 the gather load operation. If the load is conditional, MASK is the
2707 unvectorized condition and MASK_DT is its definition type, otherwise
2708 MASK is null. */
2710 static void
2711 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2712 gimple_stmt_iterator *gsi,
2713 gimple **vec_stmt,
2714 gather_scatter_info *gs_info,
2715 tree mask)
2717 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2718 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2719 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2720 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2721 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2722 edge pe = loop_preheader_edge (loop);
2723 enum { NARROW, NONE, WIDEN } modifier;
2724 poly_uint64 gather_off_nunits
2725 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2727 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2728 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2729 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2730 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2731 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2732 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2733 tree scaletype = TREE_VALUE (arglist);
2734 tree real_masktype = masktype;
2735 gcc_checking_assert (types_compatible_p (srctype, rettype)
2736 && (!mask
2737 || TREE_CODE (masktype) == INTEGER_TYPE
2738 || types_compatible_p (srctype, masktype)));
2739 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2740 masktype = truth_type_for (srctype);
2742 tree mask_halftype = masktype;
2743 tree perm_mask = NULL_TREE;
2744 tree mask_perm_mask = NULL_TREE;
2745 if (known_eq (nunits, gather_off_nunits))
2746 modifier = NONE;
2747 else if (known_eq (nunits * 2, gather_off_nunits))
2749 modifier = WIDEN;
2751 /* Currently widening gathers and scatters are only supported for
2752 fixed-length vectors. */
2753 int count = gather_off_nunits.to_constant ();
2754 vec_perm_builder sel (count, count, 1);
2755 for (int i = 0; i < count; ++i)
2756 sel.quick_push (i | (count / 2));
2758 vec_perm_indices indices (sel, 1, count);
2759 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2760 indices);
2762 else if (known_eq (nunits, gather_off_nunits * 2))
2764 modifier = NARROW;
2766 /* Currently narrowing gathers and scatters are only supported for
2767 fixed-length vectors. */
2768 int count = nunits.to_constant ();
2769 vec_perm_builder sel (count, count, 1);
2770 sel.quick_grow (count);
2771 for (int i = 0; i < count; ++i)
2772 sel[i] = i < count / 2 ? i : i + count / 2;
2773 vec_perm_indices indices (sel, 2, count);
2774 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2776 ncopies *= 2;
2778 if (mask && masktype == real_masktype)
2780 for (int i = 0; i < count; ++i)
2781 sel[i] = i | (count / 2);
2782 indices.new_vector (sel, 2, count);
2783 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2785 else if (mask)
2786 mask_halftype = truth_type_for (gs_info->offset_vectype);
2788 else
2789 gcc_unreachable ();
2791 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2792 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2794 tree ptr = fold_convert (ptrtype, gs_info->base);
2795 if (!is_gimple_min_invariant (ptr))
2797 gimple_seq seq;
2798 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2799 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2800 gcc_assert (!new_bb);
2803 tree scale = build_int_cst (scaletype, gs_info->scale);
2805 tree vec_oprnd0 = NULL_TREE;
2806 tree vec_mask = NULL_TREE;
2807 tree src_op = NULL_TREE;
2808 tree mask_op = NULL_TREE;
2809 tree prev_res = NULL_TREE;
2811 if (!mask)
2813 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2814 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2817 auto_vec<tree> vec_oprnds0;
2818 auto_vec<tree> vec_masks;
2819 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2820 modifier == WIDEN ? ncopies / 2 : ncopies,
2821 gs_info->offset, &vec_oprnds0);
2822 if (mask)
2823 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2824 modifier == NARROW ? ncopies / 2 : ncopies,
2825 mask, &vec_masks, masktype);
2826 for (int j = 0; j < ncopies; ++j)
2828 tree op, var;
2829 if (modifier == WIDEN && (j & 1))
2830 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2831 perm_mask, stmt_info, gsi);
2832 else
2833 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2835 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2837 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2838 TYPE_VECTOR_SUBPARTS (idxtype)));
2839 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2840 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2841 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2842 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2843 op = var;
2846 if (mask)
2848 if (mask_perm_mask && (j & 1))
2849 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2850 mask_perm_mask, stmt_info, gsi);
2851 else
2853 if (modifier == NARROW)
2855 if ((j & 1) == 0)
2856 vec_mask = vec_masks[j / 2];
2858 else
2859 vec_mask = vec_masks[j];
2861 mask_op = vec_mask;
2862 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2864 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2865 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2866 gcc_assert (known_eq (sub1, sub2));
2867 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2868 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2869 gassign *new_stmt
2870 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2871 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2872 mask_op = var;
2875 if (modifier == NARROW && masktype != real_masktype)
2877 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2878 gassign *new_stmt
2879 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2880 : VEC_UNPACK_LO_EXPR,
2881 mask_op);
2882 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2883 mask_op = var;
2885 src_op = mask_op;
2888 tree mask_arg = mask_op;
2889 if (masktype != real_masktype)
2891 tree utype, optype = TREE_TYPE (mask_op);
2892 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2893 utype = real_masktype;
2894 else
2895 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2896 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2897 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2898 gassign *new_stmt
2899 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2900 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2901 mask_arg = var;
2902 if (!useless_type_conversion_p (real_masktype, utype))
2904 gcc_assert (TYPE_PRECISION (utype)
2905 <= TYPE_PRECISION (real_masktype));
2906 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2907 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2908 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2909 mask_arg = var;
2911 src_op = build_zero_cst (srctype);
2913 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2914 mask_arg, scale);
2916 if (!useless_type_conversion_p (vectype, rettype))
2918 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2919 TYPE_VECTOR_SUBPARTS (rettype)));
2920 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2921 gimple_call_set_lhs (new_stmt, op);
2922 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2923 var = make_ssa_name (vec_dest);
2924 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2925 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2926 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2928 else
2930 var = make_ssa_name (vec_dest, new_stmt);
2931 gimple_call_set_lhs (new_stmt, var);
2932 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2935 if (modifier == NARROW)
2937 if ((j & 1) == 0)
2939 prev_res = var;
2940 continue;
2942 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2943 stmt_info, gsi);
2944 new_stmt = SSA_NAME_DEF_STMT (var);
2947 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2949 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2952 /* Prepare the base and offset in GS_INFO for vectorization.
2953 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2954 to the vectorized offset argument for the first copy of STMT_INFO.
2955 STMT_INFO is the statement described by GS_INFO and LOOP is the
2956 containing loop. */
2958 static void
2959 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2960 class loop *loop, stmt_vec_info stmt_info,
2961 gather_scatter_info *gs_info,
2962 tree *dataref_ptr, vec<tree> *vec_offset)
2964 gimple_seq stmts = NULL;
2965 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2966 if (stmts != NULL)
2968 basic_block new_bb;
2969 edge pe = loop_preheader_edge (loop);
2970 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2971 gcc_assert (!new_bb);
2973 unsigned ncopies = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2974 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2975 gs_info->offset, vec_offset,
2976 gs_info->offset_vectype);
2979 /* Prepare to implement a grouped or strided load or store using
2980 the gather load or scatter store operation described by GS_INFO.
2981 STMT_INFO is the load or store statement.
2983 Set *DATAREF_BUMP to the amount that should be added to the base
2984 address after each copy of the vectorized statement. Set *VEC_OFFSET
2985 to an invariant offset vector in which element I has the value
2986 I * DR_STEP / SCALE. */
2988 static void
2989 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2990 loop_vec_info loop_vinfo,
2991 gather_scatter_info *gs_info,
2992 tree *dataref_bump, tree *vec_offset)
2994 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2995 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2997 tree bump = size_binop (MULT_EXPR,
2998 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2999 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3000 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3002 /* The offset given in GS_INFO can have pointer type, so use the element
3003 type of the vector instead. */
3004 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3006 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3007 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3008 ssize_int (gs_info->scale));
3009 step = fold_convert (offset_type, step);
3011 /* Create {0, X, X*2, X*3, ...}. */
3012 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3013 build_zero_cst (offset_type), step);
3014 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3017 /* Return the amount that should be added to a vector pointer to move
3018 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3019 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3020 vectorization. */
3022 static tree
3023 vect_get_data_ptr_increment (vec_info *vinfo,
3024 dr_vec_info *dr_info, tree aggr_type,
3025 vect_memory_access_type memory_access_type)
3027 if (memory_access_type == VMAT_INVARIANT)
3028 return size_zero_node;
3030 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3031 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3032 if (tree_int_cst_sgn (step) == -1)
3033 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3034 return iv_step;
3037 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3039 static bool
3040 vectorizable_bswap (vec_info *vinfo,
3041 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3042 gimple **vec_stmt, slp_tree slp_node,
3043 slp_tree *slp_op,
3044 tree vectype_in, stmt_vector_for_cost *cost_vec)
3046 tree op, vectype;
3047 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3048 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3049 unsigned ncopies;
3051 op = gimple_call_arg (stmt, 0);
3052 vectype = STMT_VINFO_VECTYPE (stmt_info);
3053 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3055 /* Multiple types in SLP are handled by creating the appropriate number of
3056 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3057 case of SLP. */
3058 if (slp_node)
3059 ncopies = 1;
3060 else
3061 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3063 gcc_assert (ncopies >= 1);
3065 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3066 if (! char_vectype)
3067 return false;
3069 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3070 unsigned word_bytes;
3071 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3072 return false;
3074 /* The encoding uses one stepped pattern for each byte in the word. */
3075 vec_perm_builder elts (num_bytes, word_bytes, 3);
3076 for (unsigned i = 0; i < 3; ++i)
3077 for (unsigned j = 0; j < word_bytes; ++j)
3078 elts.quick_push ((i + 1) * word_bytes - j - 1);
3080 vec_perm_indices indices (elts, 1, num_bytes);
3081 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3082 return false;
3084 if (! vec_stmt)
3086 if (slp_node
3087 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3089 if (dump_enabled_p ())
3090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3091 "incompatible vector types for invariants\n");
3092 return false;
3095 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3096 DUMP_VECT_SCOPE ("vectorizable_bswap");
3097 record_stmt_cost (cost_vec,
3098 1, vector_stmt, stmt_info, 0, vect_prologue);
3099 record_stmt_cost (cost_vec,
3100 slp_node
3101 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3102 vec_perm, stmt_info, 0, vect_body);
3103 return true;
3106 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3108 /* Transform. */
3109 vec<tree> vec_oprnds = vNULL;
3110 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3111 op, &vec_oprnds);
3112 /* Arguments are ready. create the new vector stmt. */
3113 unsigned i;
3114 tree vop;
3115 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3117 gimple *new_stmt;
3118 tree tem = make_ssa_name (char_vectype);
3119 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3120 char_vectype, vop));
3121 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3122 tree tem2 = make_ssa_name (char_vectype);
3123 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3124 tem, tem, bswap_vconst);
3125 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3126 tem = make_ssa_name (vectype);
3127 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3128 vectype, tem2));
3129 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3130 if (slp_node)
3131 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3132 else
3133 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3136 if (!slp_node)
3137 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3139 vec_oprnds.release ();
3140 return true;
3143 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3144 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3145 in a single step. On success, store the binary pack code in
3146 *CONVERT_CODE. */
3148 static bool
3149 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3150 tree_code *convert_code)
3152 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3153 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3154 return false;
3156 tree_code code;
3157 int multi_step_cvt = 0;
3158 auto_vec <tree, 8> interm_types;
3159 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3160 &code, &multi_step_cvt, &interm_types)
3161 || multi_step_cvt)
3162 return false;
3164 *convert_code = code;
3165 return true;
3168 /* Function vectorizable_call.
3170 Check if STMT_INFO performs a function call that can be vectorized.
3171 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3172 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3173 Return true if STMT_INFO is vectorizable in this way. */
3175 static bool
3176 vectorizable_call (vec_info *vinfo,
3177 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3178 gimple **vec_stmt, slp_tree slp_node,
3179 stmt_vector_for_cost *cost_vec)
3181 gcall *stmt;
3182 tree vec_dest;
3183 tree scalar_dest;
3184 tree op;
3185 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3186 tree vectype_out, vectype_in;
3187 poly_uint64 nunits_in;
3188 poly_uint64 nunits_out;
3189 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3190 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3191 tree fndecl, new_temp, rhs_type;
3192 enum vect_def_type dt[4]
3193 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3194 vect_unknown_def_type };
3195 tree vectypes[ARRAY_SIZE (dt)] = {};
3196 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3197 int ndts = ARRAY_SIZE (dt);
3198 int ncopies, j;
3199 auto_vec<tree, 8> vargs;
3200 auto_vec<tree, 8> orig_vargs;
3201 enum { NARROW, NONE, WIDEN } modifier;
3202 size_t i, nargs;
3203 tree lhs;
3205 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3206 return false;
3208 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3209 && ! vec_stmt)
3210 return false;
3212 /* Is STMT_INFO a vectorizable call? */
3213 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3214 if (!stmt)
3215 return false;
3217 if (gimple_call_internal_p (stmt)
3218 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3219 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3220 /* Handled by vectorizable_load and vectorizable_store. */
3221 return false;
3223 if (gimple_call_lhs (stmt) == NULL_TREE
3224 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3225 return false;
3227 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3229 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3231 /* Process function arguments. */
3232 rhs_type = NULL_TREE;
3233 vectype_in = NULL_TREE;
3234 nargs = gimple_call_num_args (stmt);
3236 /* Bail out if the function has more than four arguments, we do not have
3237 interesting builtin functions to vectorize with more than two arguments
3238 except for fma. No arguments is also not good. */
3239 if (nargs == 0 || nargs > 4)
3240 return false;
3242 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3243 combined_fn cfn = gimple_call_combined_fn (stmt);
3244 if (cfn == CFN_GOMP_SIMD_LANE)
3246 nargs = 0;
3247 rhs_type = unsigned_type_node;
3250 int mask_opno = -1;
3251 if (internal_fn_p (cfn))
3252 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3254 for (i = 0; i < nargs; i++)
3256 if ((int) i == mask_opno)
3258 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3259 &op, &slp_op[i], &dt[i], &vectypes[i]))
3260 return false;
3261 continue;
3264 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3265 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3269 "use not simple.\n");
3270 return false;
3273 /* We can only handle calls with arguments of the same type. */
3274 if (rhs_type
3275 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3277 if (dump_enabled_p ())
3278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3279 "argument types differ.\n");
3280 return false;
3282 if (!rhs_type)
3283 rhs_type = TREE_TYPE (op);
3285 if (!vectype_in)
3286 vectype_in = vectypes[i];
3287 else if (vectypes[i]
3288 && !types_compatible_p (vectypes[i], vectype_in))
3290 if (dump_enabled_p ())
3291 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3292 "argument vector types differ.\n");
3293 return false;
3296 /* If all arguments are external or constant defs, infer the vector type
3297 from the scalar type. */
3298 if (!vectype_in)
3299 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3300 if (vec_stmt)
3301 gcc_assert (vectype_in);
3302 if (!vectype_in)
3304 if (dump_enabled_p ())
3305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3306 "no vectype for scalar type %T\n", rhs_type);
3308 return false;
3310 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3311 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3312 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3313 by a pack of the two vectors into an SI vector. We would need
3314 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3315 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3317 if (dump_enabled_p ())
3318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3319 "mismatched vector sizes %T and %T\n",
3320 vectype_in, vectype_out);
3321 return false;
3324 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3325 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3327 if (dump_enabled_p ())
3328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3329 "mixed mask and nonmask vector types\n");
3330 return false;
3333 /* FORNOW */
3334 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3335 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3336 if (known_eq (nunits_in * 2, nunits_out))
3337 modifier = NARROW;
3338 else if (known_eq (nunits_out, nunits_in))
3339 modifier = NONE;
3340 else if (known_eq (nunits_out * 2, nunits_in))
3341 modifier = WIDEN;
3342 else
3343 return false;
3345 /* We only handle functions that do not read or clobber memory. */
3346 if (gimple_vuse (stmt))
3348 if (dump_enabled_p ())
3349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3350 "function reads from or writes to memory.\n");
3351 return false;
3354 /* For now, we only vectorize functions if a target specific builtin
3355 is available. TODO -- in some cases, it might be profitable to
3356 insert the calls for pieces of the vector, in order to be able
3357 to vectorize other operations in the loop. */
3358 fndecl = NULL_TREE;
3359 internal_fn ifn = IFN_LAST;
3360 tree callee = gimple_call_fndecl (stmt);
3362 /* First try using an internal function. */
3363 tree_code convert_code = ERROR_MARK;
3364 if (cfn != CFN_LAST
3365 && (modifier == NONE
3366 || (modifier == NARROW
3367 && simple_integer_narrowing (vectype_out, vectype_in,
3368 &convert_code))))
3369 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3370 vectype_in);
3372 /* If that fails, try asking for a target-specific built-in function. */
3373 if (ifn == IFN_LAST)
3375 if (cfn != CFN_LAST)
3376 fndecl = targetm.vectorize.builtin_vectorized_function
3377 (cfn, vectype_out, vectype_in);
3378 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3379 fndecl = targetm.vectorize.builtin_md_vectorized_function
3380 (callee, vectype_out, vectype_in);
3383 if (ifn == IFN_LAST && !fndecl)
3385 if (cfn == CFN_GOMP_SIMD_LANE
3386 && !slp_node
3387 && loop_vinfo
3388 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3389 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3390 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3391 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3393 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3394 { 0, 1, 2, ... vf - 1 } vector. */
3395 gcc_assert (nargs == 0);
3397 else if (modifier == NONE
3398 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3399 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3400 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3401 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3402 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3403 slp_op, vectype_in, cost_vec);
3404 else
3406 if (dump_enabled_p ())
3407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3408 "function is not vectorizable.\n");
3409 return false;
3413 if (slp_node)
3414 ncopies = 1;
3415 else if (modifier == NARROW && ifn == IFN_LAST)
3416 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3417 else
3418 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3420 /* Sanity check: make sure that at least one copy of the vectorized stmt
3421 needs to be generated. */
3422 gcc_assert (ncopies >= 1);
3424 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3425 if (!vec_stmt) /* transformation not required. */
3427 if (slp_node)
3428 for (i = 0; i < nargs; ++i)
3429 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3431 if (dump_enabled_p ())
3432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3433 "incompatible vector types for invariants\n");
3434 return false;
3436 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3437 DUMP_VECT_SCOPE ("vectorizable_call");
3438 vect_model_simple_cost (vinfo, stmt_info,
3439 ncopies, dt, ndts, slp_node, cost_vec);
3440 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3441 record_stmt_cost (cost_vec, ncopies / 2,
3442 vec_promote_demote, stmt_info, 0, vect_body);
3444 if (loop_vinfo && mask_opno >= 0)
3446 unsigned int nvectors = (slp_node
3447 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3448 : ncopies);
3449 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3450 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3451 vectype_out, scalar_mask);
3453 return true;
3456 /* Transform. */
3458 if (dump_enabled_p ())
3459 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3461 /* Handle def. */
3462 scalar_dest = gimple_call_lhs (stmt);
3463 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3465 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3467 if (modifier == NONE || ifn != IFN_LAST)
3469 tree prev_res = NULL_TREE;
3470 vargs.safe_grow (nargs, true);
3471 orig_vargs.safe_grow (nargs, true);
3472 auto_vec<vec<tree> > vec_defs (nargs);
3473 for (j = 0; j < ncopies; ++j)
3475 /* Build argument list for the vectorized call. */
3476 if (slp_node)
3478 vec<tree> vec_oprnds0;
3480 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3481 vec_oprnds0 = vec_defs[0];
3483 /* Arguments are ready. Create the new vector stmt. */
3484 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3486 size_t k;
3487 for (k = 0; k < nargs; k++)
3489 vec<tree> vec_oprndsk = vec_defs[k];
3490 vargs[k] = vec_oprndsk[i];
3492 gimple *new_stmt;
3493 if (modifier == NARROW)
3495 /* We don't define any narrowing conditional functions
3496 at present. */
3497 gcc_assert (mask_opno < 0);
3498 tree half_res = make_ssa_name (vectype_in);
3499 gcall *call
3500 = gimple_build_call_internal_vec (ifn, vargs);
3501 gimple_call_set_lhs (call, half_res);
3502 gimple_call_set_nothrow (call, true);
3503 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3504 if ((i & 1) == 0)
3506 prev_res = half_res;
3507 continue;
3509 new_temp = make_ssa_name (vec_dest);
3510 new_stmt = gimple_build_assign (new_temp, convert_code,
3511 prev_res, half_res);
3512 vect_finish_stmt_generation (vinfo, stmt_info,
3513 new_stmt, gsi);
3515 else
3517 if (mask_opno >= 0 && masked_loop_p)
3519 unsigned int vec_num = vec_oprnds0.length ();
3520 /* Always true for SLP. */
3521 gcc_assert (ncopies == 1);
3522 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3523 vectype_out, i);
3524 vargs[mask_opno] = prepare_load_store_mask
3525 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3528 gcall *call;
3529 if (ifn != IFN_LAST)
3530 call = gimple_build_call_internal_vec (ifn, vargs);
3531 else
3532 call = gimple_build_call_vec (fndecl, vargs);
3533 new_temp = make_ssa_name (vec_dest, call);
3534 gimple_call_set_lhs (call, new_temp);
3535 gimple_call_set_nothrow (call, true);
3536 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3537 new_stmt = call;
3539 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3541 continue;
3544 for (i = 0; i < nargs; i++)
3546 op = gimple_call_arg (stmt, i);
3547 if (j == 0)
3549 vec_defs.quick_push (vNULL);
3550 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3551 op, &vec_defs[i],
3552 vectypes[i]);
3554 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3557 if (mask_opno >= 0 && masked_loop_p)
3559 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3560 vectype_out, j);
3561 vargs[mask_opno]
3562 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3563 vargs[mask_opno], gsi);
3566 gimple *new_stmt;
3567 if (cfn == CFN_GOMP_SIMD_LANE)
3569 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3570 tree new_var
3571 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3572 gimple *init_stmt = gimple_build_assign (new_var, cst);
3573 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3574 new_temp = make_ssa_name (vec_dest);
3575 new_stmt = gimple_build_assign (new_temp, new_var);
3576 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3578 else if (modifier == NARROW)
3580 /* We don't define any narrowing conditional functions at
3581 present. */
3582 gcc_assert (mask_opno < 0);
3583 tree half_res = make_ssa_name (vectype_in);
3584 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3585 gimple_call_set_lhs (call, half_res);
3586 gimple_call_set_nothrow (call, true);
3587 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3588 if ((j & 1) == 0)
3590 prev_res = half_res;
3591 continue;
3593 new_temp = make_ssa_name (vec_dest);
3594 new_stmt = gimple_build_assign (new_temp, convert_code,
3595 prev_res, half_res);
3596 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3598 else
3600 gcall *call;
3601 if (ifn != IFN_LAST)
3602 call = gimple_build_call_internal_vec (ifn, vargs);
3603 else
3604 call = gimple_build_call_vec (fndecl, vargs);
3605 new_temp = make_ssa_name (vec_dest, call);
3606 gimple_call_set_lhs (call, new_temp);
3607 gimple_call_set_nothrow (call, true);
3608 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3609 new_stmt = call;
3612 if (j == (modifier == NARROW ? 1 : 0))
3613 *vec_stmt = new_stmt;
3614 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3616 for (i = 0; i < nargs; i++)
3618 vec<tree> vec_oprndsi = vec_defs[i];
3619 vec_oprndsi.release ();
3622 else if (modifier == NARROW)
3624 auto_vec<vec<tree> > vec_defs (nargs);
3625 /* We don't define any narrowing conditional functions at present. */
3626 gcc_assert (mask_opno < 0);
3627 for (j = 0; j < ncopies; ++j)
3629 /* Build argument list for the vectorized call. */
3630 if (j == 0)
3631 vargs.create (nargs * 2);
3632 else
3633 vargs.truncate (0);
3635 if (slp_node)
3637 vec<tree> vec_oprnds0;
3639 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3640 vec_oprnds0 = vec_defs[0];
3642 /* Arguments are ready. Create the new vector stmt. */
3643 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3645 size_t k;
3646 vargs.truncate (0);
3647 for (k = 0; k < nargs; k++)
3649 vec<tree> vec_oprndsk = vec_defs[k];
3650 vargs.quick_push (vec_oprndsk[i]);
3651 vargs.quick_push (vec_oprndsk[i + 1]);
3653 gcall *call;
3654 if (ifn != IFN_LAST)
3655 call = gimple_build_call_internal_vec (ifn, vargs);
3656 else
3657 call = gimple_build_call_vec (fndecl, vargs);
3658 new_temp = make_ssa_name (vec_dest, call);
3659 gimple_call_set_lhs (call, new_temp);
3660 gimple_call_set_nothrow (call, true);
3661 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3662 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3664 continue;
3667 for (i = 0; i < nargs; i++)
3669 op = gimple_call_arg (stmt, i);
3670 if (j == 0)
3672 vec_defs.quick_push (vNULL);
3673 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3674 op, &vec_defs[i], vectypes[i]);
3676 vec_oprnd0 = vec_defs[i][2*j];
3677 vec_oprnd1 = vec_defs[i][2*j+1];
3679 vargs.quick_push (vec_oprnd0);
3680 vargs.quick_push (vec_oprnd1);
3683 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3684 new_temp = make_ssa_name (vec_dest, new_stmt);
3685 gimple_call_set_lhs (new_stmt, new_temp);
3686 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3688 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3691 if (!slp_node)
3692 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3694 for (i = 0; i < nargs; i++)
3696 vec<tree> vec_oprndsi = vec_defs[i];
3697 vec_oprndsi.release ();
3700 else
3701 /* No current target implements this case. */
3702 return false;
3704 vargs.release ();
3706 /* The call in STMT might prevent it from being removed in dce.
3707 We however cannot remove it here, due to the way the ssa name
3708 it defines is mapped to the new definition. So just replace
3709 rhs of the statement with something harmless. */
3711 if (slp_node)
3712 return true;
3714 stmt_info = vect_orig_stmt (stmt_info);
3715 lhs = gimple_get_lhs (stmt_info->stmt);
3717 gassign *new_stmt
3718 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3719 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3721 return true;
3725 struct simd_call_arg_info
3727 tree vectype;
3728 tree op;
3729 HOST_WIDE_INT linear_step;
3730 enum vect_def_type dt;
3731 unsigned int align;
3732 bool simd_lane_linear;
3735 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3736 is linear within simd lane (but not within whole loop), note it in
3737 *ARGINFO. */
3739 static void
3740 vect_simd_lane_linear (tree op, class loop *loop,
3741 struct simd_call_arg_info *arginfo)
3743 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3745 if (!is_gimple_assign (def_stmt)
3746 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3747 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3748 return;
3750 tree base = gimple_assign_rhs1 (def_stmt);
3751 HOST_WIDE_INT linear_step = 0;
3752 tree v = gimple_assign_rhs2 (def_stmt);
3753 while (TREE_CODE (v) == SSA_NAME)
3755 tree t;
3756 def_stmt = SSA_NAME_DEF_STMT (v);
3757 if (is_gimple_assign (def_stmt))
3758 switch (gimple_assign_rhs_code (def_stmt))
3760 case PLUS_EXPR:
3761 t = gimple_assign_rhs2 (def_stmt);
3762 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3763 return;
3764 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3765 v = gimple_assign_rhs1 (def_stmt);
3766 continue;
3767 case MULT_EXPR:
3768 t = gimple_assign_rhs2 (def_stmt);
3769 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3770 return;
3771 linear_step = tree_to_shwi (t);
3772 v = gimple_assign_rhs1 (def_stmt);
3773 continue;
3774 CASE_CONVERT:
3775 t = gimple_assign_rhs1 (def_stmt);
3776 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3777 || (TYPE_PRECISION (TREE_TYPE (v))
3778 < TYPE_PRECISION (TREE_TYPE (t))))
3779 return;
3780 if (!linear_step)
3781 linear_step = 1;
3782 v = t;
3783 continue;
3784 default:
3785 return;
3787 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3788 && loop->simduid
3789 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3790 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3791 == loop->simduid))
3793 if (!linear_step)
3794 linear_step = 1;
3795 arginfo->linear_step = linear_step;
3796 arginfo->op = base;
3797 arginfo->simd_lane_linear = true;
3798 return;
3803 /* Return the number of elements in vector type VECTYPE, which is associated
3804 with a SIMD clone. At present these vectors always have a constant
3805 length. */
3807 static unsigned HOST_WIDE_INT
3808 simd_clone_subparts (tree vectype)
3810 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3813 /* Function vectorizable_simd_clone_call.
3815 Check if STMT_INFO performs a function call that can be vectorized
3816 by calling a simd clone of the function.
3817 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3818 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3819 Return true if STMT_INFO is vectorizable in this way. */
3821 static bool
3822 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3823 gimple_stmt_iterator *gsi,
3824 gimple **vec_stmt, slp_tree slp_node,
3825 stmt_vector_for_cost *)
3827 tree vec_dest;
3828 tree scalar_dest;
3829 tree op, type;
3830 tree vec_oprnd0 = NULL_TREE;
3831 tree vectype;
3832 poly_uint64 nunits;
3833 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3834 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3835 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3836 tree fndecl, new_temp;
3837 int ncopies, j;
3838 auto_vec<simd_call_arg_info> arginfo;
3839 vec<tree> vargs = vNULL;
3840 size_t i, nargs;
3841 tree lhs, rtype, ratype;
3842 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3844 /* Is STMT a vectorizable call? */
3845 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3846 if (!stmt)
3847 return false;
3849 fndecl = gimple_call_fndecl (stmt);
3850 if (fndecl == NULL_TREE)
3851 return false;
3853 struct cgraph_node *node = cgraph_node::get (fndecl);
3854 if (node == NULL || node->simd_clones == NULL)
3855 return false;
3857 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3858 return false;
3860 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3861 && ! vec_stmt)
3862 return false;
3864 if (gimple_call_lhs (stmt)
3865 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3866 return false;
3868 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3870 vectype = STMT_VINFO_VECTYPE (stmt_info);
3872 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3873 return false;
3875 /* FORNOW */
3876 if (slp_node)
3877 return false;
3879 /* Process function arguments. */
3880 nargs = gimple_call_num_args (stmt);
3882 /* Bail out if the function has zero arguments. */
3883 if (nargs == 0)
3884 return false;
3886 arginfo.reserve (nargs, true);
3888 for (i = 0; i < nargs; i++)
3890 simd_call_arg_info thisarginfo;
3891 affine_iv iv;
3893 thisarginfo.linear_step = 0;
3894 thisarginfo.align = 0;
3895 thisarginfo.op = NULL_TREE;
3896 thisarginfo.simd_lane_linear = false;
3898 op = gimple_call_arg (stmt, i);
3899 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3900 &thisarginfo.vectype)
3901 || thisarginfo.dt == vect_uninitialized_def)
3903 if (dump_enabled_p ())
3904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3905 "use not simple.\n");
3906 return false;
3909 if (thisarginfo.dt == vect_constant_def
3910 || thisarginfo.dt == vect_external_def)
3911 gcc_assert (thisarginfo.vectype == NULL_TREE);
3912 else
3914 gcc_assert (thisarginfo.vectype != NULL_TREE);
3915 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3917 if (dump_enabled_p ())
3918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3919 "vector mask arguments are not supported\n");
3920 return false;
3924 /* For linear arguments, the analyze phase should have saved
3925 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3926 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3927 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3929 gcc_assert (vec_stmt);
3930 thisarginfo.linear_step
3931 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3932 thisarginfo.op
3933 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3934 thisarginfo.simd_lane_linear
3935 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3936 == boolean_true_node);
3937 /* If loop has been peeled for alignment, we need to adjust it. */
3938 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3939 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3940 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3942 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3943 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3944 tree opt = TREE_TYPE (thisarginfo.op);
3945 bias = fold_convert (TREE_TYPE (step), bias);
3946 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3947 thisarginfo.op
3948 = fold_build2 (POINTER_TYPE_P (opt)
3949 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3950 thisarginfo.op, bias);
3953 else if (!vec_stmt
3954 && thisarginfo.dt != vect_constant_def
3955 && thisarginfo.dt != vect_external_def
3956 && loop_vinfo
3957 && TREE_CODE (op) == SSA_NAME
3958 && simple_iv (loop, loop_containing_stmt (stmt), op,
3959 &iv, false)
3960 && tree_fits_shwi_p (iv.step))
3962 thisarginfo.linear_step = tree_to_shwi (iv.step);
3963 thisarginfo.op = iv.base;
3965 else if ((thisarginfo.dt == vect_constant_def
3966 || thisarginfo.dt == vect_external_def)
3967 && POINTER_TYPE_P (TREE_TYPE (op)))
3968 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3969 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3970 linear too. */
3971 if (POINTER_TYPE_P (TREE_TYPE (op))
3972 && !thisarginfo.linear_step
3973 && !vec_stmt
3974 && thisarginfo.dt != vect_constant_def
3975 && thisarginfo.dt != vect_external_def
3976 && loop_vinfo
3977 && !slp_node
3978 && TREE_CODE (op) == SSA_NAME)
3979 vect_simd_lane_linear (op, loop, &thisarginfo);
3981 arginfo.quick_push (thisarginfo);
3984 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3985 if (!vf.is_constant ())
3987 if (dump_enabled_p ())
3988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3989 "not considering SIMD clones; not yet supported"
3990 " for variable-width vectors.\n");
3991 return false;
3994 unsigned int badness = 0;
3995 struct cgraph_node *bestn = NULL;
3996 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3997 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3998 else
3999 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4000 n = n->simdclone->next_clone)
4002 unsigned int this_badness = 0;
4003 unsigned int num_calls;
4004 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4005 || n->simdclone->nargs != nargs)
4006 continue;
4007 if (num_calls != 1)
4008 this_badness += exact_log2 (num_calls) * 4096;
4009 if (n->simdclone->inbranch)
4010 this_badness += 8192;
4011 int target_badness = targetm.simd_clone.usable (n);
4012 if (target_badness < 0)
4013 continue;
4014 this_badness += target_badness * 512;
4015 /* FORNOW: Have to add code to add the mask argument. */
4016 if (n->simdclone->inbranch)
4017 continue;
4018 for (i = 0; i < nargs; i++)
4020 switch (n->simdclone->args[i].arg_type)
4022 case SIMD_CLONE_ARG_TYPE_VECTOR:
4023 if (!useless_type_conversion_p
4024 (n->simdclone->args[i].orig_type,
4025 TREE_TYPE (gimple_call_arg (stmt, i))))
4026 i = -1;
4027 else if (arginfo[i].dt == vect_constant_def
4028 || arginfo[i].dt == vect_external_def
4029 || arginfo[i].linear_step)
4030 this_badness += 64;
4031 break;
4032 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4033 if (arginfo[i].dt != vect_constant_def
4034 && arginfo[i].dt != vect_external_def)
4035 i = -1;
4036 break;
4037 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4038 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4039 if (arginfo[i].dt == vect_constant_def
4040 || arginfo[i].dt == vect_external_def
4041 || (arginfo[i].linear_step
4042 != n->simdclone->args[i].linear_step))
4043 i = -1;
4044 break;
4045 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4046 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4047 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4048 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4049 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4050 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4051 /* FORNOW */
4052 i = -1;
4053 break;
4054 case SIMD_CLONE_ARG_TYPE_MASK:
4055 gcc_unreachable ();
4057 if (i == (size_t) -1)
4058 break;
4059 if (n->simdclone->args[i].alignment > arginfo[i].align)
4061 i = -1;
4062 break;
4064 if (arginfo[i].align)
4065 this_badness += (exact_log2 (arginfo[i].align)
4066 - exact_log2 (n->simdclone->args[i].alignment));
4068 if (i == (size_t) -1)
4069 continue;
4070 if (bestn == NULL || this_badness < badness)
4072 bestn = n;
4073 badness = this_badness;
4077 if (bestn == NULL)
4078 return false;
4080 for (i = 0; i < nargs; i++)
4081 if ((arginfo[i].dt == vect_constant_def
4082 || arginfo[i].dt == vect_external_def)
4083 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4085 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4086 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4087 slp_node);
4088 if (arginfo[i].vectype == NULL
4089 || !constant_multiple_p (bestn->simdclone->simdlen,
4090 simd_clone_subparts (arginfo[i].vectype)))
4091 return false;
4094 fndecl = bestn->decl;
4095 nunits = bestn->simdclone->simdlen;
4096 ncopies = vector_unroll_factor (vf, nunits);
4098 /* If the function isn't const, only allow it in simd loops where user
4099 has asserted that at least nunits consecutive iterations can be
4100 performed using SIMD instructions. */
4101 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4102 && gimple_vuse (stmt))
4103 return false;
4105 /* Sanity check: make sure that at least one copy of the vectorized stmt
4106 needs to be generated. */
4107 gcc_assert (ncopies >= 1);
4109 if (!vec_stmt) /* transformation not required. */
4111 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4112 for (i = 0; i < nargs; i++)
4113 if ((bestn->simdclone->args[i].arg_type
4114 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4115 || (bestn->simdclone->args[i].arg_type
4116 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4118 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4119 + 1,
4120 true);
4121 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4122 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4123 ? size_type_node : TREE_TYPE (arginfo[i].op);
4124 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4125 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4126 tree sll = arginfo[i].simd_lane_linear
4127 ? boolean_true_node : boolean_false_node;
4128 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4130 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4131 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4132 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4133 dt, slp_node, cost_vec); */
4134 return true;
4137 /* Transform. */
4139 if (dump_enabled_p ())
4140 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4142 /* Handle def. */
4143 scalar_dest = gimple_call_lhs (stmt);
4144 vec_dest = NULL_TREE;
4145 rtype = NULL_TREE;
4146 ratype = NULL_TREE;
4147 if (scalar_dest)
4149 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4150 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4151 if (TREE_CODE (rtype) == ARRAY_TYPE)
4153 ratype = rtype;
4154 rtype = TREE_TYPE (ratype);
4158 auto_vec<vec<tree> > vec_oprnds;
4159 auto_vec<unsigned> vec_oprnds_i;
4160 vec_oprnds.safe_grow_cleared (nargs, true);
4161 vec_oprnds_i.safe_grow_cleared (nargs, true);
4162 for (j = 0; j < ncopies; ++j)
4164 /* Build argument list for the vectorized call. */
4165 if (j == 0)
4166 vargs.create (nargs);
4167 else
4168 vargs.truncate (0);
4170 for (i = 0; i < nargs; i++)
4172 unsigned int k, l, m, o;
4173 tree atype;
4174 op = gimple_call_arg (stmt, i);
4175 switch (bestn->simdclone->args[i].arg_type)
4177 case SIMD_CLONE_ARG_TYPE_VECTOR:
4178 atype = bestn->simdclone->args[i].vector_type;
4179 o = vector_unroll_factor (nunits,
4180 simd_clone_subparts (atype));
4181 for (m = j * o; m < (j + 1) * o; m++)
4183 if (simd_clone_subparts (atype)
4184 < simd_clone_subparts (arginfo[i].vectype))
4186 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4187 k = (simd_clone_subparts (arginfo[i].vectype)
4188 / simd_clone_subparts (atype));
4189 gcc_assert ((k & (k - 1)) == 0);
4190 if (m == 0)
4192 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4193 ncopies * o / k, op,
4194 &vec_oprnds[i]);
4195 vec_oprnds_i[i] = 0;
4196 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4198 else
4200 vec_oprnd0 = arginfo[i].op;
4201 if ((m & (k - 1)) == 0)
4202 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4204 arginfo[i].op = vec_oprnd0;
4205 vec_oprnd0
4206 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4207 bitsize_int (prec),
4208 bitsize_int ((m & (k - 1)) * prec));
4209 gassign *new_stmt
4210 = gimple_build_assign (make_ssa_name (atype),
4211 vec_oprnd0);
4212 vect_finish_stmt_generation (vinfo, stmt_info,
4213 new_stmt, gsi);
4214 vargs.safe_push (gimple_assign_lhs (new_stmt));
4216 else
4218 k = (simd_clone_subparts (atype)
4219 / simd_clone_subparts (arginfo[i].vectype));
4220 gcc_assert ((k & (k - 1)) == 0);
4221 vec<constructor_elt, va_gc> *ctor_elts;
4222 if (k != 1)
4223 vec_alloc (ctor_elts, k);
4224 else
4225 ctor_elts = NULL;
4226 for (l = 0; l < k; l++)
4228 if (m == 0 && l == 0)
4230 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4231 k * o * ncopies,
4233 &vec_oprnds[i]);
4234 vec_oprnds_i[i] = 0;
4235 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4237 else
4238 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4239 arginfo[i].op = vec_oprnd0;
4240 if (k == 1)
4241 break;
4242 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4243 vec_oprnd0);
4245 if (k == 1)
4246 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4247 atype))
4249 vec_oprnd0
4250 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4251 gassign *new_stmt
4252 = gimple_build_assign (make_ssa_name (atype),
4253 vec_oprnd0);
4254 vect_finish_stmt_generation (vinfo, stmt_info,
4255 new_stmt, gsi);
4256 vargs.safe_push (gimple_assign_lhs (new_stmt));
4258 else
4259 vargs.safe_push (vec_oprnd0);
4260 else
4262 vec_oprnd0 = build_constructor (atype, ctor_elts);
4263 gassign *new_stmt
4264 = gimple_build_assign (make_ssa_name (atype),
4265 vec_oprnd0);
4266 vect_finish_stmt_generation (vinfo, stmt_info,
4267 new_stmt, gsi);
4268 vargs.safe_push (gimple_assign_lhs (new_stmt));
4272 break;
4273 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4274 vargs.safe_push (op);
4275 break;
4276 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4277 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4278 if (j == 0)
4280 gimple_seq stmts;
4281 arginfo[i].op
4282 = force_gimple_operand (unshare_expr (arginfo[i].op),
4283 &stmts, true, NULL_TREE);
4284 if (stmts != NULL)
4286 basic_block new_bb;
4287 edge pe = loop_preheader_edge (loop);
4288 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4289 gcc_assert (!new_bb);
4291 if (arginfo[i].simd_lane_linear)
4293 vargs.safe_push (arginfo[i].op);
4294 break;
4296 tree phi_res = copy_ssa_name (op);
4297 gphi *new_phi = create_phi_node (phi_res, loop->header);
4298 add_phi_arg (new_phi, arginfo[i].op,
4299 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4300 enum tree_code code
4301 = POINTER_TYPE_P (TREE_TYPE (op))
4302 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4303 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4304 ? sizetype : TREE_TYPE (op);
4305 poly_widest_int cst
4306 = wi::mul (bestn->simdclone->args[i].linear_step,
4307 ncopies * nunits);
4308 tree tcst = wide_int_to_tree (type, cst);
4309 tree phi_arg = copy_ssa_name (op);
4310 gassign *new_stmt
4311 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4312 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4313 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4314 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4315 UNKNOWN_LOCATION);
4316 arginfo[i].op = phi_res;
4317 vargs.safe_push (phi_res);
4319 else
4321 enum tree_code code
4322 = POINTER_TYPE_P (TREE_TYPE (op))
4323 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4324 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4325 ? sizetype : TREE_TYPE (op);
4326 poly_widest_int cst
4327 = wi::mul (bestn->simdclone->args[i].linear_step,
4328 j * nunits);
4329 tree tcst = wide_int_to_tree (type, cst);
4330 new_temp = make_ssa_name (TREE_TYPE (op));
4331 gassign *new_stmt
4332 = gimple_build_assign (new_temp, code,
4333 arginfo[i].op, tcst);
4334 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4335 vargs.safe_push (new_temp);
4337 break;
4338 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4339 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4340 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4341 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4342 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4343 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4344 default:
4345 gcc_unreachable ();
4349 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4350 if (vec_dest)
4352 gcc_assert (ratype
4353 || known_eq (simd_clone_subparts (rtype), nunits));
4354 if (ratype)
4355 new_temp = create_tmp_var (ratype);
4356 else if (useless_type_conversion_p (vectype, rtype))
4357 new_temp = make_ssa_name (vec_dest, new_call);
4358 else
4359 new_temp = make_ssa_name (rtype, new_call);
4360 gimple_call_set_lhs (new_call, new_temp);
4362 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4363 gimple *new_stmt = new_call;
4365 if (vec_dest)
4367 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4369 unsigned int k, l;
4370 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4371 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4372 k = vector_unroll_factor (nunits,
4373 simd_clone_subparts (vectype));
4374 gcc_assert ((k & (k - 1)) == 0);
4375 for (l = 0; l < k; l++)
4377 tree t;
4378 if (ratype)
4380 t = build_fold_addr_expr (new_temp);
4381 t = build2 (MEM_REF, vectype, t,
4382 build_int_cst (TREE_TYPE (t), l * bytes));
4384 else
4385 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4386 bitsize_int (prec), bitsize_int (l * prec));
4387 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4388 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4390 if (j == 0 && l == 0)
4391 *vec_stmt = new_stmt;
4392 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4395 if (ratype)
4396 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4397 continue;
4399 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4401 unsigned int k = (simd_clone_subparts (vectype)
4402 / simd_clone_subparts (rtype));
4403 gcc_assert ((k & (k - 1)) == 0);
4404 if ((j & (k - 1)) == 0)
4405 vec_alloc (ret_ctor_elts, k);
4406 if (ratype)
4408 unsigned int m, o;
4409 o = vector_unroll_factor (nunits,
4410 simd_clone_subparts (rtype));
4411 for (m = 0; m < o; m++)
4413 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4414 size_int (m), NULL_TREE, NULL_TREE);
4415 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4416 tem);
4417 vect_finish_stmt_generation (vinfo, stmt_info,
4418 new_stmt, gsi);
4419 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4420 gimple_assign_lhs (new_stmt));
4422 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4424 else
4425 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4426 if ((j & (k - 1)) != k - 1)
4427 continue;
4428 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4429 new_stmt
4430 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4431 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4433 if ((unsigned) j == k - 1)
4434 *vec_stmt = new_stmt;
4435 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4436 continue;
4438 else if (ratype)
4440 tree t = build_fold_addr_expr (new_temp);
4441 t = build2 (MEM_REF, vectype, t,
4442 build_int_cst (TREE_TYPE (t), 0));
4443 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4444 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4445 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4447 else if (!useless_type_conversion_p (vectype, rtype))
4449 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4450 new_stmt
4451 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4452 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4456 if (j == 0)
4457 *vec_stmt = new_stmt;
4458 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4461 for (i = 0; i < nargs; ++i)
4463 vec<tree> oprndsi = vec_oprnds[i];
4464 oprndsi.release ();
4466 vargs.release ();
4468 /* The call in STMT might prevent it from being removed in dce.
4469 We however cannot remove it here, due to the way the ssa name
4470 it defines is mapped to the new definition. So just replace
4471 rhs of the statement with something harmless. */
4473 if (slp_node)
4474 return true;
4476 gimple *new_stmt;
4477 if (scalar_dest)
4479 type = TREE_TYPE (scalar_dest);
4480 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4481 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4483 else
4484 new_stmt = gimple_build_nop ();
4485 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4486 unlink_stmt_vdef (stmt);
4488 return true;
4492 /* Function vect_gen_widened_results_half
4494 Create a vector stmt whose code, type, number of arguments, and result
4495 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4496 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4497 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4498 needs to be created (DECL is a function-decl of a target-builtin).
4499 STMT_INFO is the original scalar stmt that we are vectorizing. */
4501 static gimple *
4502 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4503 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4504 tree vec_dest, gimple_stmt_iterator *gsi,
4505 stmt_vec_info stmt_info)
4507 gimple *new_stmt;
4508 tree new_temp;
4510 /* Generate half of the widened result: */
4511 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4512 if (op_type != binary_op)
4513 vec_oprnd1 = NULL;
4514 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4515 new_temp = make_ssa_name (vec_dest, new_stmt);
4516 gimple_assign_set_lhs (new_stmt, new_temp);
4517 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4519 return new_stmt;
4523 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4524 For multi-step conversions store the resulting vectors and call the function
4525 recursively. */
4527 static void
4528 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4529 int multi_step_cvt,
4530 stmt_vec_info stmt_info,
4531 vec<tree> &vec_dsts,
4532 gimple_stmt_iterator *gsi,
4533 slp_tree slp_node, enum tree_code code)
4535 unsigned int i;
4536 tree vop0, vop1, new_tmp, vec_dest;
4538 vec_dest = vec_dsts.pop ();
4540 for (i = 0; i < vec_oprnds->length (); i += 2)
4542 /* Create demotion operation. */
4543 vop0 = (*vec_oprnds)[i];
4544 vop1 = (*vec_oprnds)[i + 1];
4545 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4546 new_tmp = make_ssa_name (vec_dest, new_stmt);
4547 gimple_assign_set_lhs (new_stmt, new_tmp);
4548 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4550 if (multi_step_cvt)
4551 /* Store the resulting vector for next recursive call. */
4552 (*vec_oprnds)[i/2] = new_tmp;
4553 else
4555 /* This is the last step of the conversion sequence. Store the
4556 vectors in SLP_NODE or in vector info of the scalar statement
4557 (or in STMT_VINFO_RELATED_STMT chain). */
4558 if (slp_node)
4559 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4560 else
4561 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4565 /* For multi-step demotion operations we first generate demotion operations
4566 from the source type to the intermediate types, and then combine the
4567 results (stored in VEC_OPRNDS) in demotion operation to the destination
4568 type. */
4569 if (multi_step_cvt)
4571 /* At each level of recursion we have half of the operands we had at the
4572 previous level. */
4573 vec_oprnds->truncate ((i+1)/2);
4574 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4575 multi_step_cvt - 1,
4576 stmt_info, vec_dsts, gsi,
4577 slp_node, VEC_PACK_TRUNC_EXPR);
4580 vec_dsts.quick_push (vec_dest);
4584 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4585 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4586 STMT_INFO. For multi-step conversions store the resulting vectors and
4587 call the function recursively. */
4589 static void
4590 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4591 vec<tree> *vec_oprnds0,
4592 vec<tree> *vec_oprnds1,
4593 stmt_vec_info stmt_info, tree vec_dest,
4594 gimple_stmt_iterator *gsi,
4595 enum tree_code code1,
4596 enum tree_code code2, int op_type)
4598 int i;
4599 tree vop0, vop1, new_tmp1, new_tmp2;
4600 gimple *new_stmt1, *new_stmt2;
4601 vec<tree> vec_tmp = vNULL;
4603 vec_tmp.create (vec_oprnds0->length () * 2);
4604 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4606 if (op_type == binary_op)
4607 vop1 = (*vec_oprnds1)[i];
4608 else
4609 vop1 = NULL_TREE;
4611 /* Generate the two halves of promotion operation. */
4612 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4613 op_type, vec_dest, gsi,
4614 stmt_info);
4615 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4616 op_type, vec_dest, gsi,
4617 stmt_info);
4618 if (is_gimple_call (new_stmt1))
4620 new_tmp1 = gimple_call_lhs (new_stmt1);
4621 new_tmp2 = gimple_call_lhs (new_stmt2);
4623 else
4625 new_tmp1 = gimple_assign_lhs (new_stmt1);
4626 new_tmp2 = gimple_assign_lhs (new_stmt2);
4629 /* Store the results for the next step. */
4630 vec_tmp.quick_push (new_tmp1);
4631 vec_tmp.quick_push (new_tmp2);
4634 vec_oprnds0->release ();
4635 *vec_oprnds0 = vec_tmp;
4638 /* Create vectorized promotion stmts for widening stmts using only half the
4639 potential vector size for input. */
4640 static void
4641 vect_create_half_widening_stmts (vec_info *vinfo,
4642 vec<tree> *vec_oprnds0,
4643 vec<tree> *vec_oprnds1,
4644 stmt_vec_info stmt_info, tree vec_dest,
4645 gimple_stmt_iterator *gsi,
4646 enum tree_code code1,
4647 int op_type)
4649 int i;
4650 tree vop0, vop1;
4651 gimple *new_stmt1;
4652 gimple *new_stmt2;
4653 gimple *new_stmt3;
4654 vec<tree> vec_tmp = vNULL;
4656 vec_tmp.create (vec_oprnds0->length ());
4657 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4659 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4661 gcc_assert (op_type == binary_op);
4662 vop1 = (*vec_oprnds1)[i];
4664 /* Widen the first vector input. */
4665 out_type = TREE_TYPE (vec_dest);
4666 new_tmp1 = make_ssa_name (out_type);
4667 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4668 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4669 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4671 /* Widen the second vector input. */
4672 new_tmp2 = make_ssa_name (out_type);
4673 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4674 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4675 /* Perform the operation. With both vector inputs widened. */
4676 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4678 else
4680 /* Perform the operation. With the single vector input widened. */
4681 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4684 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4685 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4686 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4688 /* Store the results for the next step. */
4689 vec_tmp.quick_push (new_tmp3);
4692 vec_oprnds0->release ();
4693 *vec_oprnds0 = vec_tmp;
4697 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4698 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4699 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4700 Return true if STMT_INFO is vectorizable in this way. */
4702 static bool
4703 vectorizable_conversion (vec_info *vinfo,
4704 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4705 gimple **vec_stmt, slp_tree slp_node,
4706 stmt_vector_for_cost *cost_vec)
4708 tree vec_dest;
4709 tree scalar_dest;
4710 tree op0, op1 = NULL_TREE;
4711 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4712 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4713 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4714 tree new_temp;
4715 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4716 int ndts = 2;
4717 poly_uint64 nunits_in;
4718 poly_uint64 nunits_out;
4719 tree vectype_out, vectype_in;
4720 int ncopies, i;
4721 tree lhs_type, rhs_type;
4722 enum { NARROW, NONE, WIDEN } modifier;
4723 vec<tree> vec_oprnds0 = vNULL;
4724 vec<tree> vec_oprnds1 = vNULL;
4725 tree vop0;
4726 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4727 int multi_step_cvt = 0;
4728 vec<tree> interm_types = vNULL;
4729 tree intermediate_type, cvt_type = NULL_TREE;
4730 int op_type;
4731 unsigned short fltsz;
4733 /* Is STMT a vectorizable conversion? */
4735 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4736 return false;
4738 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4739 && ! vec_stmt)
4740 return false;
4742 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4743 if (!stmt)
4744 return false;
4746 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4747 return false;
4749 code = gimple_assign_rhs_code (stmt);
4750 if (!CONVERT_EXPR_CODE_P (code)
4751 && code != FIX_TRUNC_EXPR
4752 && code != FLOAT_EXPR
4753 && code != WIDEN_PLUS_EXPR
4754 && code != WIDEN_MINUS_EXPR
4755 && code != WIDEN_MULT_EXPR
4756 && code != WIDEN_LSHIFT_EXPR)
4757 return false;
4759 bool widen_arith = (code == WIDEN_PLUS_EXPR
4760 || code == WIDEN_MINUS_EXPR
4761 || code == WIDEN_MULT_EXPR
4762 || code == WIDEN_LSHIFT_EXPR);
4763 op_type = TREE_CODE_LENGTH (code);
4765 /* Check types of lhs and rhs. */
4766 scalar_dest = gimple_assign_lhs (stmt);
4767 lhs_type = TREE_TYPE (scalar_dest);
4768 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4770 /* Check the operands of the operation. */
4771 slp_tree slp_op0, slp_op1 = NULL;
4772 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4773 0, &op0, &slp_op0, &dt[0], &vectype_in))
4775 if (dump_enabled_p ())
4776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4777 "use not simple.\n");
4778 return false;
4781 rhs_type = TREE_TYPE (op0);
4782 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4783 && !((INTEGRAL_TYPE_P (lhs_type)
4784 && INTEGRAL_TYPE_P (rhs_type))
4785 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4786 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4787 return false;
4789 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4790 && ((INTEGRAL_TYPE_P (lhs_type)
4791 && !type_has_mode_precision_p (lhs_type))
4792 || (INTEGRAL_TYPE_P (rhs_type)
4793 && !type_has_mode_precision_p (rhs_type))))
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4797 "type conversion to/from bit-precision unsupported."
4798 "\n");
4799 return false;
4802 if (op_type == binary_op)
4804 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4805 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4807 op1 = gimple_assign_rhs2 (stmt);
4808 tree vectype1_in;
4809 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4810 &op1, &slp_op1, &dt[1], &vectype1_in))
4812 if (dump_enabled_p ())
4813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4814 "use not simple.\n");
4815 return false;
4817 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4818 OP1. */
4819 if (!vectype_in)
4820 vectype_in = vectype1_in;
4823 /* If op0 is an external or constant def, infer the vector type
4824 from the scalar type. */
4825 if (!vectype_in)
4826 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4827 if (vec_stmt)
4828 gcc_assert (vectype_in);
4829 if (!vectype_in)
4831 if (dump_enabled_p ())
4832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4833 "no vectype for scalar type %T\n", rhs_type);
4835 return false;
4838 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4839 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4841 if (dump_enabled_p ())
4842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4843 "can't convert between boolean and non "
4844 "boolean vectors %T\n", rhs_type);
4846 return false;
4849 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4850 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4851 if (known_eq (nunits_out, nunits_in))
4852 if (widen_arith)
4853 modifier = WIDEN;
4854 else
4855 modifier = NONE;
4856 else if (multiple_p (nunits_out, nunits_in))
4857 modifier = NARROW;
4858 else
4860 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4861 modifier = WIDEN;
4864 /* Multiple types in SLP are handled by creating the appropriate number of
4865 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4866 case of SLP. */
4867 if (slp_node)
4868 ncopies = 1;
4869 else if (modifier == NARROW)
4870 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4871 else
4872 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4874 /* Sanity check: make sure that at least one copy of the vectorized stmt
4875 needs to be generated. */
4876 gcc_assert (ncopies >= 1);
4878 bool found_mode = false;
4879 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4880 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4881 opt_scalar_mode rhs_mode_iter;
4883 /* Supportable by target? */
4884 switch (modifier)
4886 case NONE:
4887 if (code != FIX_TRUNC_EXPR
4888 && code != FLOAT_EXPR
4889 && !CONVERT_EXPR_CODE_P (code))
4890 return false;
4891 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4892 break;
4893 /* FALLTHRU */
4894 unsupported:
4895 if (dump_enabled_p ())
4896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4897 "conversion not supported by target.\n");
4898 return false;
4900 case WIDEN:
4901 if (known_eq (nunits_in, nunits_out))
4903 if (!supportable_half_widening_operation (code, vectype_out,
4904 vectype_in, &code1))
4905 goto unsupported;
4906 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4907 break;
4909 if (supportable_widening_operation (vinfo, code, stmt_info,
4910 vectype_out, vectype_in, &code1,
4911 &code2, &multi_step_cvt,
4912 &interm_types))
4914 /* Binary widening operation can only be supported directly by the
4915 architecture. */
4916 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4917 break;
4920 if (code != FLOAT_EXPR
4921 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4922 goto unsupported;
4924 fltsz = GET_MODE_SIZE (lhs_mode);
4925 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4927 rhs_mode = rhs_mode_iter.require ();
4928 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4929 break;
4931 cvt_type
4932 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4933 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4934 if (cvt_type == NULL_TREE)
4935 goto unsupported;
4937 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4939 if (!supportable_convert_operation (code, vectype_out,
4940 cvt_type, &codecvt1))
4941 goto unsupported;
4943 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4944 vectype_out, cvt_type,
4945 &codecvt1, &codecvt2,
4946 &multi_step_cvt,
4947 &interm_types))
4948 continue;
4949 else
4950 gcc_assert (multi_step_cvt == 0);
4952 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4953 cvt_type,
4954 vectype_in, &code1, &code2,
4955 &multi_step_cvt, &interm_types))
4957 found_mode = true;
4958 break;
4962 if (!found_mode)
4963 goto unsupported;
4965 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4966 codecvt2 = ERROR_MARK;
4967 else
4969 multi_step_cvt++;
4970 interm_types.safe_push (cvt_type);
4971 cvt_type = NULL_TREE;
4973 break;
4975 case NARROW:
4976 gcc_assert (op_type == unary_op);
4977 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4978 &code1, &multi_step_cvt,
4979 &interm_types))
4980 break;
4982 if (code != FIX_TRUNC_EXPR
4983 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4984 goto unsupported;
4986 cvt_type
4987 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4988 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4989 if (cvt_type == NULL_TREE)
4990 goto unsupported;
4991 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4992 &codecvt1))
4993 goto unsupported;
4994 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4995 &code1, &multi_step_cvt,
4996 &interm_types))
4997 break;
4998 goto unsupported;
5000 default:
5001 gcc_unreachable ();
5004 if (!vec_stmt) /* transformation not required. */
5006 if (slp_node
5007 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5008 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5010 if (dump_enabled_p ())
5011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5012 "incompatible vector types for invariants\n");
5013 return false;
5015 DUMP_VECT_SCOPE ("vectorizable_conversion");
5016 if (modifier == NONE)
5018 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5019 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5020 cost_vec);
5022 else if (modifier == NARROW)
5024 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5025 /* The final packing step produces one vector result per copy. */
5026 unsigned int nvectors
5027 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5028 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5029 multi_step_cvt, cost_vec,
5030 widen_arith);
5032 else
5034 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5035 /* The initial unpacking step produces two vector results
5036 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5037 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5038 unsigned int nvectors
5039 = (slp_node
5040 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5041 : ncopies * 2);
5042 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5043 multi_step_cvt, cost_vec,
5044 widen_arith);
5046 interm_types.release ();
5047 return true;
5050 /* Transform. */
5051 if (dump_enabled_p ())
5052 dump_printf_loc (MSG_NOTE, vect_location,
5053 "transform conversion. ncopies = %d.\n", ncopies);
5055 if (op_type == binary_op)
5057 if (CONSTANT_CLASS_P (op0))
5058 op0 = fold_convert (TREE_TYPE (op1), op0);
5059 else if (CONSTANT_CLASS_P (op1))
5060 op1 = fold_convert (TREE_TYPE (op0), op1);
5063 /* In case of multi-step conversion, we first generate conversion operations
5064 to the intermediate types, and then from that types to the final one.
5065 We create vector destinations for the intermediate type (TYPES) received
5066 from supportable_*_operation, and store them in the correct order
5067 for future use in vect_create_vectorized_*_stmts (). */
5068 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5069 vec_dest = vect_create_destination_var (scalar_dest,
5070 (cvt_type && modifier == WIDEN)
5071 ? cvt_type : vectype_out);
5072 vec_dsts.quick_push (vec_dest);
5074 if (multi_step_cvt)
5076 for (i = interm_types.length () - 1;
5077 interm_types.iterate (i, &intermediate_type); i--)
5079 vec_dest = vect_create_destination_var (scalar_dest,
5080 intermediate_type);
5081 vec_dsts.quick_push (vec_dest);
5085 if (cvt_type)
5086 vec_dest = vect_create_destination_var (scalar_dest,
5087 modifier == WIDEN
5088 ? vectype_out : cvt_type);
5090 int ninputs = 1;
5091 if (!slp_node)
5093 if (modifier == WIDEN)
5095 else if (modifier == NARROW)
5097 if (multi_step_cvt)
5098 ninputs = vect_pow2 (multi_step_cvt);
5099 ninputs *= 2;
5103 switch (modifier)
5105 case NONE:
5106 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5107 op0, &vec_oprnds0);
5108 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5110 /* Arguments are ready, create the new vector stmt. */
5111 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5112 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5113 new_temp = make_ssa_name (vec_dest, new_stmt);
5114 gimple_assign_set_lhs (new_stmt, new_temp);
5115 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5117 if (slp_node)
5118 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5119 else
5120 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5122 break;
5124 case WIDEN:
5125 /* In case the vectorization factor (VF) is bigger than the number
5126 of elements that we can fit in a vectype (nunits), we have to
5127 generate more than one vector stmt - i.e - we need to "unroll"
5128 the vector stmt by a factor VF/nunits. */
5129 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5130 op0, &vec_oprnds0,
5131 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5132 &vec_oprnds1);
5133 if (code == WIDEN_LSHIFT_EXPR)
5135 int oprnds_size = vec_oprnds0.length ();
5136 vec_oprnds1.create (oprnds_size);
5137 for (i = 0; i < oprnds_size; ++i)
5138 vec_oprnds1.quick_push (op1);
5140 /* Arguments are ready. Create the new vector stmts. */
5141 for (i = multi_step_cvt; i >= 0; i--)
5143 tree this_dest = vec_dsts[i];
5144 enum tree_code c1 = code1, c2 = code2;
5145 if (i == 0 && codecvt2 != ERROR_MARK)
5147 c1 = codecvt1;
5148 c2 = codecvt2;
5150 if (known_eq (nunits_out, nunits_in))
5151 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5152 &vec_oprnds1, stmt_info,
5153 this_dest, gsi,
5154 c1, op_type);
5155 else
5156 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5157 &vec_oprnds1, stmt_info,
5158 this_dest, gsi,
5159 c1, c2, op_type);
5162 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5164 gimple *new_stmt;
5165 if (cvt_type)
5167 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5168 new_temp = make_ssa_name (vec_dest);
5169 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5170 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5172 else
5173 new_stmt = SSA_NAME_DEF_STMT (vop0);
5175 if (slp_node)
5176 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5177 else
5178 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5180 break;
5182 case NARROW:
5183 /* In case the vectorization factor (VF) is bigger than the number
5184 of elements that we can fit in a vectype (nunits), we have to
5185 generate more than one vector stmt - i.e - we need to "unroll"
5186 the vector stmt by a factor VF/nunits. */
5187 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5188 op0, &vec_oprnds0);
5189 /* Arguments are ready. Create the new vector stmts. */
5190 if (cvt_type)
5191 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5193 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5194 new_temp = make_ssa_name (vec_dest);
5195 gassign *new_stmt
5196 = gimple_build_assign (new_temp, codecvt1, vop0);
5197 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5198 vec_oprnds0[i] = new_temp;
5201 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5202 multi_step_cvt,
5203 stmt_info, vec_dsts, gsi,
5204 slp_node, code1);
5205 break;
5207 if (!slp_node)
5208 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5210 vec_oprnds0.release ();
5211 vec_oprnds1.release ();
5212 interm_types.release ();
5214 return true;
5217 /* Return true if we can assume from the scalar form of STMT_INFO that
5218 neither the scalar nor the vector forms will generate code. STMT_INFO
5219 is known not to involve a data reference. */
5221 bool
5222 vect_nop_conversion_p (stmt_vec_info stmt_info)
5224 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5225 if (!stmt)
5226 return false;
5228 tree lhs = gimple_assign_lhs (stmt);
5229 tree_code code = gimple_assign_rhs_code (stmt);
5230 tree rhs = gimple_assign_rhs1 (stmt);
5232 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5233 return true;
5235 if (CONVERT_EXPR_CODE_P (code))
5236 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5238 return false;
5241 /* Function vectorizable_assignment.
5243 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5244 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5245 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5246 Return true if STMT_INFO is vectorizable in this way. */
5248 static bool
5249 vectorizable_assignment (vec_info *vinfo,
5250 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5251 gimple **vec_stmt, slp_tree slp_node,
5252 stmt_vector_for_cost *cost_vec)
5254 tree vec_dest;
5255 tree scalar_dest;
5256 tree op;
5257 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5258 tree new_temp;
5259 enum vect_def_type dt[1] = {vect_unknown_def_type};
5260 int ndts = 1;
5261 int ncopies;
5262 int i;
5263 vec<tree> vec_oprnds = vNULL;
5264 tree vop;
5265 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5266 enum tree_code code;
5267 tree vectype_in;
5269 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5270 return false;
5272 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5273 && ! vec_stmt)
5274 return false;
5276 /* Is vectorizable assignment? */
5277 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5278 if (!stmt)
5279 return false;
5281 scalar_dest = gimple_assign_lhs (stmt);
5282 if (TREE_CODE (scalar_dest) != SSA_NAME)
5283 return false;
5285 if (STMT_VINFO_DATA_REF (stmt_info))
5286 return false;
5288 code = gimple_assign_rhs_code (stmt);
5289 if (!(gimple_assign_single_p (stmt)
5290 || code == PAREN_EXPR
5291 || CONVERT_EXPR_CODE_P (code)))
5292 return false;
5294 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5295 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5297 /* Multiple types in SLP are handled by creating the appropriate number of
5298 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5299 case of SLP. */
5300 if (slp_node)
5301 ncopies = 1;
5302 else
5303 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5305 gcc_assert (ncopies >= 1);
5307 slp_tree slp_op;
5308 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5309 &dt[0], &vectype_in))
5311 if (dump_enabled_p ())
5312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5313 "use not simple.\n");
5314 return false;
5316 if (!vectype_in)
5317 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5319 /* We can handle NOP_EXPR conversions that do not change the number
5320 of elements or the vector size. */
5321 if ((CONVERT_EXPR_CODE_P (code)
5322 || code == VIEW_CONVERT_EXPR)
5323 && (!vectype_in
5324 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5325 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5326 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5327 return false;
5329 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5330 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5334 "can't convert between boolean and non "
5335 "boolean vectors %T\n", TREE_TYPE (op));
5337 return false;
5340 /* We do not handle bit-precision changes. */
5341 if ((CONVERT_EXPR_CODE_P (code)
5342 || code == VIEW_CONVERT_EXPR)
5343 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5344 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5345 || !type_has_mode_precision_p (TREE_TYPE (op)))
5346 /* But a conversion that does not change the bit-pattern is ok. */
5347 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5348 > TYPE_PRECISION (TREE_TYPE (op)))
5349 && TYPE_UNSIGNED (TREE_TYPE (op))))
5351 if (dump_enabled_p ())
5352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5353 "type conversion to/from bit-precision "
5354 "unsupported.\n");
5355 return false;
5358 if (!vec_stmt) /* transformation not required. */
5360 if (slp_node
5361 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5363 if (dump_enabled_p ())
5364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5365 "incompatible vector types for invariants\n");
5366 return false;
5368 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5369 DUMP_VECT_SCOPE ("vectorizable_assignment");
5370 if (!vect_nop_conversion_p (stmt_info))
5371 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5372 cost_vec);
5373 return true;
5376 /* Transform. */
5377 if (dump_enabled_p ())
5378 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5380 /* Handle def. */
5381 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5383 /* Handle use. */
5384 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5386 /* Arguments are ready. create the new vector stmt. */
5387 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5389 if (CONVERT_EXPR_CODE_P (code)
5390 || code == VIEW_CONVERT_EXPR)
5391 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5392 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5393 new_temp = make_ssa_name (vec_dest, new_stmt);
5394 gimple_assign_set_lhs (new_stmt, new_temp);
5395 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5396 if (slp_node)
5397 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5398 else
5399 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5401 if (!slp_node)
5402 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5404 vec_oprnds.release ();
5405 return true;
5409 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5410 either as shift by a scalar or by a vector. */
5412 bool
5413 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5416 machine_mode vec_mode;
5417 optab optab;
5418 int icode;
5419 tree vectype;
5421 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5422 if (!vectype)
5423 return false;
5425 optab = optab_for_tree_code (code, vectype, optab_scalar);
5426 if (!optab
5427 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5429 optab = optab_for_tree_code (code, vectype, optab_vector);
5430 if (!optab
5431 || (optab_handler (optab, TYPE_MODE (vectype))
5432 == CODE_FOR_nothing))
5433 return false;
5436 vec_mode = TYPE_MODE (vectype);
5437 icode = (int) optab_handler (optab, vec_mode);
5438 if (icode == CODE_FOR_nothing)
5439 return false;
5441 return true;
5445 /* Function vectorizable_shift.
5447 Check if STMT_INFO performs a shift operation that can be vectorized.
5448 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5449 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5450 Return true if STMT_INFO is vectorizable in this way. */
5452 static bool
5453 vectorizable_shift (vec_info *vinfo,
5454 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5455 gimple **vec_stmt, slp_tree slp_node,
5456 stmt_vector_for_cost *cost_vec)
5458 tree vec_dest;
5459 tree scalar_dest;
5460 tree op0, op1 = NULL;
5461 tree vec_oprnd1 = NULL_TREE;
5462 tree vectype;
5463 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5464 enum tree_code code;
5465 machine_mode vec_mode;
5466 tree new_temp;
5467 optab optab;
5468 int icode;
5469 machine_mode optab_op2_mode;
5470 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5471 int ndts = 2;
5472 poly_uint64 nunits_in;
5473 poly_uint64 nunits_out;
5474 tree vectype_out;
5475 tree op1_vectype;
5476 int ncopies;
5477 int i;
5478 vec<tree> vec_oprnds0 = vNULL;
5479 vec<tree> vec_oprnds1 = vNULL;
5480 tree vop0, vop1;
5481 unsigned int k;
5482 bool scalar_shift_arg = true;
5483 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5484 bool incompatible_op1_vectype_p = false;
5486 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5487 return false;
5489 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5490 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5491 && ! vec_stmt)
5492 return false;
5494 /* Is STMT a vectorizable binary/unary operation? */
5495 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5496 if (!stmt)
5497 return false;
5499 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5500 return false;
5502 code = gimple_assign_rhs_code (stmt);
5504 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5505 || code == RROTATE_EXPR))
5506 return false;
5508 scalar_dest = gimple_assign_lhs (stmt);
5509 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5510 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5512 if (dump_enabled_p ())
5513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5514 "bit-precision shifts not supported.\n");
5515 return false;
5518 slp_tree slp_op0;
5519 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5520 0, &op0, &slp_op0, &dt[0], &vectype))
5522 if (dump_enabled_p ())
5523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5524 "use not simple.\n");
5525 return false;
5527 /* If op0 is an external or constant def, infer the vector type
5528 from the scalar type. */
5529 if (!vectype)
5530 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5531 if (vec_stmt)
5532 gcc_assert (vectype);
5533 if (!vectype)
5535 if (dump_enabled_p ())
5536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5537 "no vectype for scalar type\n");
5538 return false;
5541 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5542 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5543 if (maybe_ne (nunits_out, nunits_in))
5544 return false;
5546 stmt_vec_info op1_def_stmt_info;
5547 slp_tree slp_op1;
5548 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5549 &dt[1], &op1_vectype, &op1_def_stmt_info))
5551 if (dump_enabled_p ())
5552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5553 "use not simple.\n");
5554 return false;
5557 /* Multiple types in SLP are handled by creating the appropriate number of
5558 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5559 case of SLP. */
5560 if (slp_node)
5561 ncopies = 1;
5562 else
5563 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5565 gcc_assert (ncopies >= 1);
5567 /* Determine whether the shift amount is a vector, or scalar. If the
5568 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5570 if ((dt[1] == vect_internal_def
5571 || dt[1] == vect_induction_def
5572 || dt[1] == vect_nested_cycle)
5573 && !slp_node)
5574 scalar_shift_arg = false;
5575 else if (dt[1] == vect_constant_def
5576 || dt[1] == vect_external_def
5577 || dt[1] == vect_internal_def)
5579 /* In SLP, need to check whether the shift count is the same,
5580 in loops if it is a constant or invariant, it is always
5581 a scalar shift. */
5582 if (slp_node)
5584 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5585 stmt_vec_info slpstmt_info;
5587 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5589 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5590 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5591 scalar_shift_arg = false;
5594 /* For internal SLP defs we have to make sure we see scalar stmts
5595 for all vector elements.
5596 ??? For different vectors we could resort to a different
5597 scalar shift operand but code-generation below simply always
5598 takes the first. */
5599 if (dt[1] == vect_internal_def
5600 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5601 stmts.length ()))
5602 scalar_shift_arg = false;
5605 /* If the shift amount is computed by a pattern stmt we cannot
5606 use the scalar amount directly thus give up and use a vector
5607 shift. */
5608 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5609 scalar_shift_arg = false;
5611 else
5613 if (dump_enabled_p ())
5614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5615 "operand mode requires invariant argument.\n");
5616 return false;
5619 /* Vector shifted by vector. */
5620 bool was_scalar_shift_arg = scalar_shift_arg;
5621 if (!scalar_shift_arg)
5623 optab = optab_for_tree_code (code, vectype, optab_vector);
5624 if (dump_enabled_p ())
5625 dump_printf_loc (MSG_NOTE, vect_location,
5626 "vector/vector shift/rotate found.\n");
5628 if (!op1_vectype)
5629 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5630 slp_op1);
5631 incompatible_op1_vectype_p
5632 = (op1_vectype == NULL_TREE
5633 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5634 TYPE_VECTOR_SUBPARTS (vectype))
5635 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5636 if (incompatible_op1_vectype_p
5637 && (!slp_node
5638 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5639 || slp_op1->refcnt != 1))
5641 if (dump_enabled_p ())
5642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5643 "unusable type for last operand in"
5644 " vector/vector shift/rotate.\n");
5645 return false;
5648 /* See if the machine has a vector shifted by scalar insn and if not
5649 then see if it has a vector shifted by vector insn. */
5650 else
5652 optab = optab_for_tree_code (code, vectype, optab_scalar);
5653 if (optab
5654 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5656 if (dump_enabled_p ())
5657 dump_printf_loc (MSG_NOTE, vect_location,
5658 "vector/scalar shift/rotate found.\n");
5660 else
5662 optab = optab_for_tree_code (code, vectype, optab_vector);
5663 if (optab
5664 && (optab_handler (optab, TYPE_MODE (vectype))
5665 != CODE_FOR_nothing))
5667 scalar_shift_arg = false;
5669 if (dump_enabled_p ())
5670 dump_printf_loc (MSG_NOTE, vect_location,
5671 "vector/vector shift/rotate found.\n");
5673 if (!op1_vectype)
5674 op1_vectype = get_vectype_for_scalar_type (vinfo,
5675 TREE_TYPE (op1),
5676 slp_op1);
5678 /* Unlike the other binary operators, shifts/rotates have
5679 the rhs being int, instead of the same type as the lhs,
5680 so make sure the scalar is the right type if we are
5681 dealing with vectors of long long/long/short/char. */
5682 incompatible_op1_vectype_p
5683 = (!op1_vectype
5684 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5685 TREE_TYPE (op1)));
5686 if (incompatible_op1_vectype_p
5687 && dt[1] == vect_internal_def)
5689 if (dump_enabled_p ())
5690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5691 "unusable type for last operand in"
5692 " vector/vector shift/rotate.\n");
5693 return false;
5699 /* Supportable by target? */
5700 if (!optab)
5702 if (dump_enabled_p ())
5703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5704 "no optab.\n");
5705 return false;
5707 vec_mode = TYPE_MODE (vectype);
5708 icode = (int) optab_handler (optab, vec_mode);
5709 if (icode == CODE_FOR_nothing)
5711 if (dump_enabled_p ())
5712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5713 "op not supported by target.\n");
5714 return false;
5716 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5717 if (vect_emulated_vector_p (vectype))
5718 return false;
5720 if (!vec_stmt) /* transformation not required. */
5722 if (slp_node
5723 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5724 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5725 && (!incompatible_op1_vectype_p
5726 || dt[1] == vect_constant_def)
5727 && !vect_maybe_update_slp_op_vectype
5728 (slp_op1,
5729 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5731 if (dump_enabled_p ())
5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5733 "incompatible vector types for invariants\n");
5734 return false;
5736 /* Now adjust the constant shift amount in place. */
5737 if (slp_node
5738 && incompatible_op1_vectype_p
5739 && dt[1] == vect_constant_def)
5741 for (unsigned i = 0;
5742 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5744 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5745 = fold_convert (TREE_TYPE (vectype),
5746 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5747 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5748 == INTEGER_CST));
5751 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5752 DUMP_VECT_SCOPE ("vectorizable_shift");
5753 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5754 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5755 return true;
5758 /* Transform. */
5760 if (dump_enabled_p ())
5761 dump_printf_loc (MSG_NOTE, vect_location,
5762 "transform binary/unary operation.\n");
5764 if (incompatible_op1_vectype_p && !slp_node)
5766 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5767 op1 = fold_convert (TREE_TYPE (vectype), op1);
5768 if (dt[1] != vect_constant_def)
5769 op1 = vect_init_vector (vinfo, stmt_info, op1,
5770 TREE_TYPE (vectype), NULL);
5773 /* Handle def. */
5774 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5776 if (scalar_shift_arg && dt[1] != vect_internal_def)
5778 /* Vector shl and shr insn patterns can be defined with scalar
5779 operand 2 (shift operand). In this case, use constant or loop
5780 invariant op1 directly, without extending it to vector mode
5781 first. */
5782 optab_op2_mode = insn_data[icode].operand[2].mode;
5783 if (!VECTOR_MODE_P (optab_op2_mode))
5785 if (dump_enabled_p ())
5786 dump_printf_loc (MSG_NOTE, vect_location,
5787 "operand 1 using scalar mode.\n");
5788 vec_oprnd1 = op1;
5789 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5790 vec_oprnds1.quick_push (vec_oprnd1);
5791 /* Store vec_oprnd1 for every vector stmt to be created.
5792 We check during the analysis that all the shift arguments
5793 are the same.
5794 TODO: Allow different constants for different vector
5795 stmts generated for an SLP instance. */
5796 for (k = 0;
5797 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5798 vec_oprnds1.quick_push (vec_oprnd1);
5801 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5803 if (was_scalar_shift_arg)
5805 /* If the argument was the same in all lanes create
5806 the correctly typed vector shift amount directly. */
5807 op1 = fold_convert (TREE_TYPE (vectype), op1);
5808 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5809 !loop_vinfo ? gsi : NULL);
5810 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5811 !loop_vinfo ? gsi : NULL);
5812 vec_oprnds1.create (slp_node->vec_stmts_size);
5813 for (k = 0; k < slp_node->vec_stmts_size; k++)
5814 vec_oprnds1.quick_push (vec_oprnd1);
5816 else if (dt[1] == vect_constant_def)
5817 /* The constant shift amount has been adjusted in place. */
5819 else
5820 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5823 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5824 (a special case for certain kind of vector shifts); otherwise,
5825 operand 1 should be of a vector type (the usual case). */
5826 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5827 op0, &vec_oprnds0,
5828 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5830 /* Arguments are ready. Create the new vector stmt. */
5831 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5833 /* For internal defs where we need to use a scalar shift arg
5834 extract the first lane. */
5835 if (scalar_shift_arg && dt[1] == vect_internal_def)
5837 vop1 = vec_oprnds1[0];
5838 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5839 gassign *new_stmt
5840 = gimple_build_assign (new_temp,
5841 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5842 vop1,
5843 TYPE_SIZE (TREE_TYPE (new_temp)),
5844 bitsize_zero_node));
5845 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5846 vop1 = new_temp;
5848 else
5849 vop1 = vec_oprnds1[i];
5850 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5851 new_temp = make_ssa_name (vec_dest, new_stmt);
5852 gimple_assign_set_lhs (new_stmt, new_temp);
5853 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5854 if (slp_node)
5855 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5856 else
5857 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5860 if (!slp_node)
5861 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5863 vec_oprnds0.release ();
5864 vec_oprnds1.release ();
5866 return true;
5870 /* Function vectorizable_operation.
5872 Check if STMT_INFO performs a binary, unary or ternary operation that can
5873 be vectorized.
5874 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5875 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5876 Return true if STMT_INFO is vectorizable in this way. */
5878 static bool
5879 vectorizable_operation (vec_info *vinfo,
5880 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5881 gimple **vec_stmt, slp_tree slp_node,
5882 stmt_vector_for_cost *cost_vec)
5884 tree vec_dest;
5885 tree scalar_dest;
5886 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5887 tree vectype;
5888 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5889 enum tree_code code, orig_code;
5890 machine_mode vec_mode;
5891 tree new_temp;
5892 int op_type;
5893 optab optab;
5894 bool target_support_p;
5895 enum vect_def_type dt[3]
5896 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5897 int ndts = 3;
5898 poly_uint64 nunits_in;
5899 poly_uint64 nunits_out;
5900 tree vectype_out;
5901 int ncopies, vec_num;
5902 int i;
5903 vec<tree> vec_oprnds0 = vNULL;
5904 vec<tree> vec_oprnds1 = vNULL;
5905 vec<tree> vec_oprnds2 = vNULL;
5906 tree vop0, vop1, vop2;
5907 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5909 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5910 return false;
5912 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5913 && ! vec_stmt)
5914 return false;
5916 /* Is STMT a vectorizable binary/unary operation? */
5917 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5918 if (!stmt)
5919 return false;
5921 /* Loads and stores are handled in vectorizable_{load,store}. */
5922 if (STMT_VINFO_DATA_REF (stmt_info))
5923 return false;
5925 orig_code = code = gimple_assign_rhs_code (stmt);
5927 /* Shifts are handled in vectorizable_shift. */
5928 if (code == LSHIFT_EXPR
5929 || code == RSHIFT_EXPR
5930 || code == LROTATE_EXPR
5931 || code == RROTATE_EXPR)
5932 return false;
5934 /* Comparisons are handled in vectorizable_comparison. */
5935 if (TREE_CODE_CLASS (code) == tcc_comparison)
5936 return false;
5938 /* Conditions are handled in vectorizable_condition. */
5939 if (code == COND_EXPR)
5940 return false;
5942 /* For pointer addition and subtraction, we should use the normal
5943 plus and minus for the vector operation. */
5944 if (code == POINTER_PLUS_EXPR)
5945 code = PLUS_EXPR;
5946 if (code == POINTER_DIFF_EXPR)
5947 code = MINUS_EXPR;
5949 /* Support only unary or binary operations. */
5950 op_type = TREE_CODE_LENGTH (code);
5951 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5953 if (dump_enabled_p ())
5954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5955 "num. args = %d (not unary/binary/ternary op).\n",
5956 op_type);
5957 return false;
5960 scalar_dest = gimple_assign_lhs (stmt);
5961 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5963 /* Most operations cannot handle bit-precision types without extra
5964 truncations. */
5965 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5966 if (!mask_op_p
5967 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5968 /* Exception are bitwise binary operations. */
5969 && code != BIT_IOR_EXPR
5970 && code != BIT_XOR_EXPR
5971 && code != BIT_AND_EXPR)
5973 if (dump_enabled_p ())
5974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5975 "bit-precision arithmetic not supported.\n");
5976 return false;
5979 slp_tree slp_op0;
5980 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5981 0, &op0, &slp_op0, &dt[0], &vectype))
5983 if (dump_enabled_p ())
5984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5985 "use not simple.\n");
5986 return false;
5988 /* If op0 is an external or constant def, infer the vector type
5989 from the scalar type. */
5990 if (!vectype)
5992 /* For boolean type we cannot determine vectype by
5993 invariant value (don't know whether it is a vector
5994 of booleans or vector of integers). We use output
5995 vectype because operations on boolean don't change
5996 type. */
5997 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5999 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6001 if (dump_enabled_p ())
6002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6003 "not supported operation on bool value.\n");
6004 return false;
6006 vectype = vectype_out;
6008 else
6009 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6010 slp_node);
6012 if (vec_stmt)
6013 gcc_assert (vectype);
6014 if (!vectype)
6016 if (dump_enabled_p ())
6017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6018 "no vectype for scalar type %T\n",
6019 TREE_TYPE (op0));
6021 return false;
6024 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6025 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6026 if (maybe_ne (nunits_out, nunits_in))
6027 return false;
6029 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6030 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6031 if (op_type == binary_op || op_type == ternary_op)
6033 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6034 1, &op1, &slp_op1, &dt[1], &vectype2))
6036 if (dump_enabled_p ())
6037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6038 "use not simple.\n");
6039 return false;
6042 if (op_type == ternary_op)
6044 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6045 2, &op2, &slp_op2, &dt[2], &vectype3))
6047 if (dump_enabled_p ())
6048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6049 "use not simple.\n");
6050 return false;
6054 /* Multiple types in SLP are handled by creating the appropriate number of
6055 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6056 case of SLP. */
6057 if (slp_node)
6059 ncopies = 1;
6060 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6062 else
6064 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6065 vec_num = 1;
6068 gcc_assert (ncopies >= 1);
6070 /* Reject attempts to combine mask types with nonmask types, e.g. if
6071 we have an AND between a (nonmask) boolean loaded from memory and
6072 a (mask) boolean result of a comparison.
6074 TODO: We could easily fix these cases up using pattern statements. */
6075 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6076 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6077 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6079 if (dump_enabled_p ())
6080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6081 "mixed mask and nonmask vector types\n");
6082 return false;
6085 /* Supportable by target? */
6087 vec_mode = TYPE_MODE (vectype);
6088 if (code == MULT_HIGHPART_EXPR)
6089 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6090 else
6092 optab = optab_for_tree_code (code, vectype, optab_default);
6093 if (!optab)
6095 if (dump_enabled_p ())
6096 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6097 "no optab.\n");
6098 return false;
6100 target_support_p = (optab_handler (optab, vec_mode)
6101 != CODE_FOR_nothing);
6104 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6105 if (!target_support_p)
6107 if (dump_enabled_p ())
6108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6109 "op not supported by target.\n");
6110 /* Check only during analysis. */
6111 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6112 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6113 return false;
6114 if (dump_enabled_p ())
6115 dump_printf_loc (MSG_NOTE, vect_location,
6116 "proceeding using word mode.\n");
6117 using_emulated_vectors_p = true;
6120 if (using_emulated_vectors_p
6121 && !vect_can_vectorize_without_simd_p (code))
6123 if (dump_enabled_p ())
6124 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6125 return false;
6128 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6129 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6130 internal_fn cond_fn = get_conditional_internal_fn (code);
6132 if (!vec_stmt) /* transformation not required. */
6134 /* If this operation is part of a reduction, a fully-masked loop
6135 should only change the active lanes of the reduction chain,
6136 keeping the inactive lanes as-is. */
6137 if (loop_vinfo
6138 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6139 && reduc_idx >= 0)
6141 if (cond_fn == IFN_LAST
6142 || !direct_internal_fn_supported_p (cond_fn, vectype,
6143 OPTIMIZE_FOR_SPEED))
6145 if (dump_enabled_p ())
6146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6147 "can't use a fully-masked loop because no"
6148 " conditional operation is available.\n");
6149 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6151 else
6152 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6153 vectype, NULL);
6156 /* Put types on constant and invariant SLP children. */
6157 if (slp_node
6158 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6159 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6160 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6162 if (dump_enabled_p ())
6163 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6164 "incompatible vector types for invariants\n");
6165 return false;
6168 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6169 DUMP_VECT_SCOPE ("vectorizable_operation");
6170 vect_model_simple_cost (vinfo, stmt_info,
6171 ncopies, dt, ndts, slp_node, cost_vec);
6172 if (using_emulated_vectors_p)
6174 /* The above vect_model_simple_cost call handles constants
6175 in the prologue and (mis-)costs one of the stmts as
6176 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6177 for the actual lowering that will be applied. */
6178 unsigned n
6179 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6180 switch (code)
6182 case PLUS_EXPR:
6183 n *= 5;
6184 break;
6185 case MINUS_EXPR:
6186 n *= 6;
6187 break;
6188 case NEGATE_EXPR:
6189 n *= 4;
6190 break;
6191 default:;
6193 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6195 return true;
6198 /* Transform. */
6200 if (dump_enabled_p ())
6201 dump_printf_loc (MSG_NOTE, vect_location,
6202 "transform binary/unary operation.\n");
6204 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6206 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6207 vectors with unsigned elements, but the result is signed. So, we
6208 need to compute the MINUS_EXPR into vectype temporary and
6209 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6210 tree vec_cvt_dest = NULL_TREE;
6211 if (orig_code == POINTER_DIFF_EXPR)
6213 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6214 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6216 /* Handle def. */
6217 else
6218 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6220 /* In case the vectorization factor (VF) is bigger than the number
6221 of elements that we can fit in a vectype (nunits), we have to generate
6222 more than one vector stmt - i.e - we need to "unroll" the
6223 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6224 from one copy of the vector stmt to the next, in the field
6225 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6226 stages to find the correct vector defs to be used when vectorizing
6227 stmts that use the defs of the current stmt. The example below
6228 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6229 we need to create 4 vectorized stmts):
6231 before vectorization:
6232 RELATED_STMT VEC_STMT
6233 S1: x = memref - -
6234 S2: z = x + 1 - -
6236 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6237 there):
6238 RELATED_STMT VEC_STMT
6239 VS1_0: vx0 = memref0 VS1_1 -
6240 VS1_1: vx1 = memref1 VS1_2 -
6241 VS1_2: vx2 = memref2 VS1_3 -
6242 VS1_3: vx3 = memref3 - -
6243 S1: x = load - VS1_0
6244 S2: z = x + 1 - -
6246 step2: vectorize stmt S2 (done here):
6247 To vectorize stmt S2 we first need to find the relevant vector
6248 def for the first operand 'x'. This is, as usual, obtained from
6249 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6250 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6251 relevant vector def 'vx0'. Having found 'vx0' we can generate
6252 the vector stmt VS2_0, and as usual, record it in the
6253 STMT_VINFO_VEC_STMT of stmt S2.
6254 When creating the second copy (VS2_1), we obtain the relevant vector
6255 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6256 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6257 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6258 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6259 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6260 chain of stmts and pointers:
6261 RELATED_STMT VEC_STMT
6262 VS1_0: vx0 = memref0 VS1_1 -
6263 VS1_1: vx1 = memref1 VS1_2 -
6264 VS1_2: vx2 = memref2 VS1_3 -
6265 VS1_3: vx3 = memref3 - -
6266 S1: x = load - VS1_0
6267 VS2_0: vz0 = vx0 + v1 VS2_1 -
6268 VS2_1: vz1 = vx1 + v1 VS2_2 -
6269 VS2_2: vz2 = vx2 + v1 VS2_3 -
6270 VS2_3: vz3 = vx3 + v1 - -
6271 S2: z = x + 1 - VS2_0 */
6273 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6274 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6275 /* Arguments are ready. Create the new vector stmt. */
6276 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6278 gimple *new_stmt = NULL;
6279 vop1 = ((op_type == binary_op || op_type == ternary_op)
6280 ? vec_oprnds1[i] : NULL_TREE);
6281 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6282 if (masked_loop_p && reduc_idx >= 0)
6284 /* Perform the operation on active elements only and take
6285 inactive elements from the reduction chain input. */
6286 gcc_assert (!vop2);
6287 vop2 = reduc_idx == 1 ? vop1 : vop0;
6288 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6289 vectype, i);
6290 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6291 vop0, vop1, vop2);
6292 new_temp = make_ssa_name (vec_dest, call);
6293 gimple_call_set_lhs (call, new_temp);
6294 gimple_call_set_nothrow (call, true);
6295 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6296 new_stmt = call;
6298 else
6300 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6301 new_temp = make_ssa_name (vec_dest, new_stmt);
6302 gimple_assign_set_lhs (new_stmt, new_temp);
6303 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6304 if (vec_cvt_dest)
6306 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6307 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6308 new_temp);
6309 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6310 gimple_assign_set_lhs (new_stmt, new_temp);
6311 vect_finish_stmt_generation (vinfo, stmt_info,
6312 new_stmt, gsi);
6315 if (slp_node)
6316 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6317 else
6318 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6321 if (!slp_node)
6322 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6324 vec_oprnds0.release ();
6325 vec_oprnds1.release ();
6326 vec_oprnds2.release ();
6328 return true;
6331 /* A helper function to ensure data reference DR_INFO's base alignment. */
6333 static void
6334 ensure_base_align (dr_vec_info *dr_info)
6336 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6337 return;
6339 if (dr_info->base_misaligned)
6341 tree base_decl = dr_info->base_decl;
6343 // We should only be able to increase the alignment of a base object if
6344 // we know what its new alignment should be at compile time.
6345 unsigned HOST_WIDE_INT align_base_to =
6346 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6348 if (decl_in_symtab_p (base_decl))
6349 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6350 else if (DECL_ALIGN (base_decl) < align_base_to)
6352 SET_DECL_ALIGN (base_decl, align_base_to);
6353 DECL_USER_ALIGN (base_decl) = 1;
6355 dr_info->base_misaligned = false;
6360 /* Function get_group_alias_ptr_type.
6362 Return the alias type for the group starting at FIRST_STMT_INFO. */
6364 static tree
6365 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6367 struct data_reference *first_dr, *next_dr;
6369 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6370 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6371 while (next_stmt_info)
6373 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6374 if (get_alias_set (DR_REF (first_dr))
6375 != get_alias_set (DR_REF (next_dr)))
6377 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_NOTE, vect_location,
6379 "conflicting alias set types.\n");
6380 return ptr_type_node;
6382 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6384 return reference_alias_ptr_type (DR_REF (first_dr));
6388 /* Function scan_operand_equal_p.
6390 Helper function for check_scan_store. Compare two references
6391 with .GOMP_SIMD_LANE bases. */
6393 static bool
6394 scan_operand_equal_p (tree ref1, tree ref2)
6396 tree ref[2] = { ref1, ref2 };
6397 poly_int64 bitsize[2], bitpos[2];
6398 tree offset[2], base[2];
6399 for (int i = 0; i < 2; ++i)
6401 machine_mode mode;
6402 int unsignedp, reversep, volatilep = 0;
6403 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6404 &offset[i], &mode, &unsignedp,
6405 &reversep, &volatilep);
6406 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6407 return false;
6408 if (TREE_CODE (base[i]) == MEM_REF
6409 && offset[i] == NULL_TREE
6410 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6412 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6413 if (is_gimple_assign (def_stmt)
6414 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6415 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6416 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6418 if (maybe_ne (mem_ref_offset (base[i]), 0))
6419 return false;
6420 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6421 offset[i] = gimple_assign_rhs2 (def_stmt);
6426 if (!operand_equal_p (base[0], base[1], 0))
6427 return false;
6428 if (maybe_ne (bitsize[0], bitsize[1]))
6429 return false;
6430 if (offset[0] != offset[1])
6432 if (!offset[0] || !offset[1])
6433 return false;
6434 if (!operand_equal_p (offset[0], offset[1], 0))
6436 tree step[2];
6437 for (int i = 0; i < 2; ++i)
6439 step[i] = integer_one_node;
6440 if (TREE_CODE (offset[i]) == SSA_NAME)
6442 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6443 if (is_gimple_assign (def_stmt)
6444 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6445 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6446 == INTEGER_CST))
6448 step[i] = gimple_assign_rhs2 (def_stmt);
6449 offset[i] = gimple_assign_rhs1 (def_stmt);
6452 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6454 step[i] = TREE_OPERAND (offset[i], 1);
6455 offset[i] = TREE_OPERAND (offset[i], 0);
6457 tree rhs1 = NULL_TREE;
6458 if (TREE_CODE (offset[i]) == SSA_NAME)
6460 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6461 if (gimple_assign_cast_p (def_stmt))
6462 rhs1 = gimple_assign_rhs1 (def_stmt);
6464 else if (CONVERT_EXPR_P (offset[i]))
6465 rhs1 = TREE_OPERAND (offset[i], 0);
6466 if (rhs1
6467 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6468 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6469 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6470 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6471 offset[i] = rhs1;
6473 if (!operand_equal_p (offset[0], offset[1], 0)
6474 || !operand_equal_p (step[0], step[1], 0))
6475 return false;
6478 return true;
6482 enum scan_store_kind {
6483 /* Normal permutation. */
6484 scan_store_kind_perm,
6486 /* Whole vector left shift permutation with zero init. */
6487 scan_store_kind_lshift_zero,
6489 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6490 scan_store_kind_lshift_cond
6493 /* Function check_scan_store.
6495 Verify if we can perform the needed permutations or whole vector shifts.
6496 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6497 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6498 to do at each step. */
6500 static int
6501 scan_store_can_perm_p (tree vectype, tree init,
6502 vec<enum scan_store_kind> *use_whole_vector = NULL)
6504 enum machine_mode vec_mode = TYPE_MODE (vectype);
6505 unsigned HOST_WIDE_INT nunits;
6506 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6507 return -1;
6508 int units_log2 = exact_log2 (nunits);
6509 if (units_log2 <= 0)
6510 return -1;
6512 int i;
6513 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6514 for (i = 0; i <= units_log2; ++i)
6516 unsigned HOST_WIDE_INT j, k;
6517 enum scan_store_kind kind = scan_store_kind_perm;
6518 vec_perm_builder sel (nunits, nunits, 1);
6519 sel.quick_grow (nunits);
6520 if (i == units_log2)
6522 for (j = 0; j < nunits; ++j)
6523 sel[j] = nunits - 1;
6525 else
6527 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6528 sel[j] = j;
6529 for (k = 0; j < nunits; ++j, ++k)
6530 sel[j] = nunits + k;
6532 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6533 if (!can_vec_perm_const_p (vec_mode, indices))
6535 if (i == units_log2)
6536 return -1;
6538 if (whole_vector_shift_kind == scan_store_kind_perm)
6540 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6541 return -1;
6542 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6543 /* Whole vector shifts shift in zeros, so if init is all zero
6544 constant, there is no need to do anything further. */
6545 if ((TREE_CODE (init) != INTEGER_CST
6546 && TREE_CODE (init) != REAL_CST)
6547 || !initializer_zerop (init))
6549 tree masktype = truth_type_for (vectype);
6550 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6551 return -1;
6552 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6555 kind = whole_vector_shift_kind;
6557 if (use_whole_vector)
6559 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6560 use_whole_vector->safe_grow_cleared (i, true);
6561 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6562 use_whole_vector->safe_push (kind);
6566 return units_log2;
6570 /* Function check_scan_store.
6572 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6574 static bool
6575 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6576 enum vect_def_type rhs_dt, bool slp, tree mask,
6577 vect_memory_access_type memory_access_type)
6579 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6580 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6581 tree ref_type;
6583 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6584 if (slp
6585 || mask
6586 || memory_access_type != VMAT_CONTIGUOUS
6587 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6588 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6589 || loop_vinfo == NULL
6590 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6591 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6592 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6593 || !integer_zerop (DR_INIT (dr_info->dr))
6594 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6595 || !alias_sets_conflict_p (get_alias_set (vectype),
6596 get_alias_set (TREE_TYPE (ref_type))))
6598 if (dump_enabled_p ())
6599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6600 "unsupported OpenMP scan store.\n");
6601 return false;
6604 /* We need to pattern match code built by OpenMP lowering and simplified
6605 by following optimizations into something we can handle.
6606 #pragma omp simd reduction(inscan,+:r)
6607 for (...)
6609 r += something ();
6610 #pragma omp scan inclusive (r)
6611 use (r);
6613 shall have body with:
6614 // Initialization for input phase, store the reduction initializer:
6615 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6616 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6617 D.2042[_21] = 0;
6618 // Actual input phase:
6620 r.0_5 = D.2042[_20];
6621 _6 = _4 + r.0_5;
6622 D.2042[_20] = _6;
6623 // Initialization for scan phase:
6624 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6625 _26 = D.2043[_25];
6626 _27 = D.2042[_25];
6627 _28 = _26 + _27;
6628 D.2043[_25] = _28;
6629 D.2042[_25] = _28;
6630 // Actual scan phase:
6632 r.1_8 = D.2042[_20];
6634 The "omp simd array" variable D.2042 holds the privatized copy used
6635 inside of the loop and D.2043 is another one that holds copies of
6636 the current original list item. The separate GOMP_SIMD_LANE ifn
6637 kinds are there in order to allow optimizing the initializer store
6638 and combiner sequence, e.g. if it is originally some C++ish user
6639 defined reduction, but allow the vectorizer to pattern recognize it
6640 and turn into the appropriate vectorized scan.
6642 For exclusive scan, this is slightly different:
6643 #pragma omp simd reduction(inscan,+:r)
6644 for (...)
6646 use (r);
6647 #pragma omp scan exclusive (r)
6648 r += something ();
6650 shall have body with:
6651 // Initialization for input phase, store the reduction initializer:
6652 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6653 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6654 D.2042[_21] = 0;
6655 // Actual input phase:
6657 r.0_5 = D.2042[_20];
6658 _6 = _4 + r.0_5;
6659 D.2042[_20] = _6;
6660 // Initialization for scan phase:
6661 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6662 _26 = D.2043[_25];
6663 D.2044[_25] = _26;
6664 _27 = D.2042[_25];
6665 _28 = _26 + _27;
6666 D.2043[_25] = _28;
6667 // Actual scan phase:
6669 r.1_8 = D.2044[_20];
6670 ... */
6672 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6674 /* Match the D.2042[_21] = 0; store above. Just require that
6675 it is a constant or external definition store. */
6676 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6678 fail_init:
6679 if (dump_enabled_p ())
6680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6681 "unsupported OpenMP scan initializer store.\n");
6682 return false;
6685 if (! loop_vinfo->scan_map)
6686 loop_vinfo->scan_map = new hash_map<tree, tree>;
6687 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6688 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6689 if (cached)
6690 goto fail_init;
6691 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6693 /* These stores can be vectorized normally. */
6694 return true;
6697 if (rhs_dt != vect_internal_def)
6699 fail:
6700 if (dump_enabled_p ())
6701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6702 "unsupported OpenMP scan combiner pattern.\n");
6703 return false;
6706 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6707 tree rhs = gimple_assign_rhs1 (stmt);
6708 if (TREE_CODE (rhs) != SSA_NAME)
6709 goto fail;
6711 gimple *other_store_stmt = NULL;
6712 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6713 bool inscan_var_store
6714 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6716 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6718 if (!inscan_var_store)
6720 use_operand_p use_p;
6721 imm_use_iterator iter;
6722 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6724 gimple *use_stmt = USE_STMT (use_p);
6725 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6726 continue;
6727 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6728 || !is_gimple_assign (use_stmt)
6729 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6730 || other_store_stmt
6731 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6732 goto fail;
6733 other_store_stmt = use_stmt;
6735 if (other_store_stmt == NULL)
6736 goto fail;
6737 rhs = gimple_assign_lhs (other_store_stmt);
6738 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6739 goto fail;
6742 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6744 use_operand_p use_p;
6745 imm_use_iterator iter;
6746 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6748 gimple *use_stmt = USE_STMT (use_p);
6749 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6750 continue;
6751 if (other_store_stmt)
6752 goto fail;
6753 other_store_stmt = use_stmt;
6756 else
6757 goto fail;
6759 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6760 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6761 || !is_gimple_assign (def_stmt)
6762 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6763 goto fail;
6765 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6766 /* For pointer addition, we should use the normal plus for the vector
6767 operation. */
6768 switch (code)
6770 case POINTER_PLUS_EXPR:
6771 code = PLUS_EXPR;
6772 break;
6773 case MULT_HIGHPART_EXPR:
6774 goto fail;
6775 default:
6776 break;
6778 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6779 goto fail;
6781 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6782 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6783 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6784 goto fail;
6786 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6787 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6788 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6789 || !gimple_assign_load_p (load1_stmt)
6790 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6791 || !gimple_assign_load_p (load2_stmt))
6792 goto fail;
6794 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6795 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6796 if (load1_stmt_info == NULL
6797 || load2_stmt_info == NULL
6798 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6799 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6800 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6801 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6802 goto fail;
6804 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6806 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6807 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6808 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6809 goto fail;
6810 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6811 tree lrhs;
6812 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6813 lrhs = rhs1;
6814 else
6815 lrhs = rhs2;
6816 use_operand_p use_p;
6817 imm_use_iterator iter;
6818 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6820 gimple *use_stmt = USE_STMT (use_p);
6821 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6822 continue;
6823 if (other_store_stmt)
6824 goto fail;
6825 other_store_stmt = use_stmt;
6829 if (other_store_stmt == NULL)
6830 goto fail;
6831 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6832 || !gimple_store_p (other_store_stmt))
6833 goto fail;
6835 stmt_vec_info other_store_stmt_info
6836 = loop_vinfo->lookup_stmt (other_store_stmt);
6837 if (other_store_stmt_info == NULL
6838 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6839 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6840 goto fail;
6842 gimple *stmt1 = stmt;
6843 gimple *stmt2 = other_store_stmt;
6844 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6845 std::swap (stmt1, stmt2);
6846 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6847 gimple_assign_rhs1 (load2_stmt)))
6849 std::swap (rhs1, rhs2);
6850 std::swap (load1_stmt, load2_stmt);
6851 std::swap (load1_stmt_info, load2_stmt_info);
6853 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6854 gimple_assign_rhs1 (load1_stmt)))
6855 goto fail;
6857 tree var3 = NULL_TREE;
6858 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6859 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6860 gimple_assign_rhs1 (load2_stmt)))
6861 goto fail;
6862 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6864 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6865 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6866 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6867 goto fail;
6868 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6869 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6870 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6871 || lookup_attribute ("omp simd inscan exclusive",
6872 DECL_ATTRIBUTES (var3)))
6873 goto fail;
6876 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6877 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6878 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6879 goto fail;
6881 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6882 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6883 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6884 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6885 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6886 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6887 goto fail;
6889 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6890 std::swap (var1, var2);
6892 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6894 if (!lookup_attribute ("omp simd inscan exclusive",
6895 DECL_ATTRIBUTES (var1)))
6896 goto fail;
6897 var1 = var3;
6900 if (loop_vinfo->scan_map == NULL)
6901 goto fail;
6902 tree *init = loop_vinfo->scan_map->get (var1);
6903 if (init == NULL)
6904 goto fail;
6906 /* The IL is as expected, now check if we can actually vectorize it.
6907 Inclusive scan:
6908 _26 = D.2043[_25];
6909 _27 = D.2042[_25];
6910 _28 = _26 + _27;
6911 D.2043[_25] = _28;
6912 D.2042[_25] = _28;
6913 should be vectorized as (where _40 is the vectorized rhs
6914 from the D.2042[_21] = 0; store):
6915 _30 = MEM <vector(8) int> [(int *)&D.2043];
6916 _31 = MEM <vector(8) int> [(int *)&D.2042];
6917 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6918 _33 = _31 + _32;
6919 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6920 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6921 _35 = _33 + _34;
6922 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6923 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6924 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6925 _37 = _35 + _36;
6926 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6927 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6928 _38 = _30 + _37;
6929 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6930 MEM <vector(8) int> [(int *)&D.2043] = _39;
6931 MEM <vector(8) int> [(int *)&D.2042] = _38;
6932 Exclusive scan:
6933 _26 = D.2043[_25];
6934 D.2044[_25] = _26;
6935 _27 = D.2042[_25];
6936 _28 = _26 + _27;
6937 D.2043[_25] = _28;
6938 should be vectorized as (where _40 is the vectorized rhs
6939 from the D.2042[_21] = 0; store):
6940 _30 = MEM <vector(8) int> [(int *)&D.2043];
6941 _31 = MEM <vector(8) int> [(int *)&D.2042];
6942 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6943 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6944 _34 = _32 + _33;
6945 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6946 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6947 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6948 _36 = _34 + _35;
6949 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6950 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6951 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6952 _38 = _36 + _37;
6953 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6954 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6955 _39 = _30 + _38;
6956 _50 = _31 + _39;
6957 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6958 MEM <vector(8) int> [(int *)&D.2044] = _39;
6959 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6960 enum machine_mode vec_mode = TYPE_MODE (vectype);
6961 optab optab = optab_for_tree_code (code, vectype, optab_default);
6962 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6963 goto fail;
6965 int units_log2 = scan_store_can_perm_p (vectype, *init);
6966 if (units_log2 == -1)
6967 goto fail;
6969 return true;
6973 /* Function vectorizable_scan_store.
6975 Helper of vectorizable_score, arguments like on vectorizable_store.
6976 Handle only the transformation, checking is done in check_scan_store. */
6978 static bool
6979 vectorizable_scan_store (vec_info *vinfo,
6980 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6981 gimple **vec_stmt, int ncopies)
6983 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6984 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6985 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6986 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6988 if (dump_enabled_p ())
6989 dump_printf_loc (MSG_NOTE, vect_location,
6990 "transform scan store. ncopies = %d\n", ncopies);
6992 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6993 tree rhs = gimple_assign_rhs1 (stmt);
6994 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6996 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6997 bool inscan_var_store
6998 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7000 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7002 use_operand_p use_p;
7003 imm_use_iterator iter;
7004 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7006 gimple *use_stmt = USE_STMT (use_p);
7007 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7008 continue;
7009 rhs = gimple_assign_lhs (use_stmt);
7010 break;
7014 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7015 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7016 if (code == POINTER_PLUS_EXPR)
7017 code = PLUS_EXPR;
7018 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7019 && commutative_tree_code (code));
7020 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7021 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7022 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7023 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7024 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7025 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7026 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7027 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7028 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7029 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7030 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7032 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7034 std::swap (rhs1, rhs2);
7035 std::swap (var1, var2);
7036 std::swap (load1_dr_info, load2_dr_info);
7039 tree *init = loop_vinfo->scan_map->get (var1);
7040 gcc_assert (init);
7042 unsigned HOST_WIDE_INT nunits;
7043 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7044 gcc_unreachable ();
7045 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7046 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7047 gcc_assert (units_log2 > 0);
7048 auto_vec<tree, 16> perms;
7049 perms.quick_grow (units_log2 + 1);
7050 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7051 for (int i = 0; i <= units_log2; ++i)
7053 unsigned HOST_WIDE_INT j, k;
7054 vec_perm_builder sel (nunits, nunits, 1);
7055 sel.quick_grow (nunits);
7056 if (i == units_log2)
7057 for (j = 0; j < nunits; ++j)
7058 sel[j] = nunits - 1;
7059 else
7061 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7062 sel[j] = j;
7063 for (k = 0; j < nunits; ++j, ++k)
7064 sel[j] = nunits + k;
7066 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7067 if (!use_whole_vector.is_empty ()
7068 && use_whole_vector[i] != scan_store_kind_perm)
7070 if (zero_vec == NULL_TREE)
7071 zero_vec = build_zero_cst (vectype);
7072 if (masktype == NULL_TREE
7073 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7074 masktype = truth_type_for (vectype);
7075 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7077 else
7078 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7081 tree vec_oprnd1 = NULL_TREE;
7082 tree vec_oprnd2 = NULL_TREE;
7083 tree vec_oprnd3 = NULL_TREE;
7084 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7085 tree dataref_offset = build_int_cst (ref_type, 0);
7086 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7087 vectype, VMAT_CONTIGUOUS);
7088 tree ldataref_ptr = NULL_TREE;
7089 tree orig = NULL_TREE;
7090 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7091 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7092 auto_vec<tree> vec_oprnds1;
7093 auto_vec<tree> vec_oprnds2;
7094 auto_vec<tree> vec_oprnds3;
7095 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7096 *init, &vec_oprnds1,
7097 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7098 rhs2, &vec_oprnds3);
7099 for (int j = 0; j < ncopies; j++)
7101 vec_oprnd1 = vec_oprnds1[j];
7102 if (ldataref_ptr == NULL)
7103 vec_oprnd2 = vec_oprnds2[j];
7104 vec_oprnd3 = vec_oprnds3[j];
7105 if (j == 0)
7106 orig = vec_oprnd3;
7107 else if (!inscan_var_store)
7108 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7110 if (ldataref_ptr)
7112 vec_oprnd2 = make_ssa_name (vectype);
7113 tree data_ref = fold_build2 (MEM_REF, vectype,
7114 unshare_expr (ldataref_ptr),
7115 dataref_offset);
7116 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7117 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7118 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7119 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7120 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7123 tree v = vec_oprnd2;
7124 for (int i = 0; i < units_log2; ++i)
7126 tree new_temp = make_ssa_name (vectype);
7127 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7128 (zero_vec
7129 && (use_whole_vector[i]
7130 != scan_store_kind_perm))
7131 ? zero_vec : vec_oprnd1, v,
7132 perms[i]);
7133 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7134 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7135 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7137 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7139 /* Whole vector shift shifted in zero bits, but if *init
7140 is not initializer_zerop, we need to replace those elements
7141 with elements from vec_oprnd1. */
7142 tree_vector_builder vb (masktype, nunits, 1);
7143 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7144 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7145 ? boolean_false_node : boolean_true_node);
7147 tree new_temp2 = make_ssa_name (vectype);
7148 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7149 new_temp, vec_oprnd1);
7150 vect_finish_stmt_generation (vinfo, stmt_info,
7151 g, gsi);
7152 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7153 new_temp = new_temp2;
7156 /* For exclusive scan, perform the perms[i] permutation once
7157 more. */
7158 if (i == 0
7159 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7160 && v == vec_oprnd2)
7162 v = new_temp;
7163 --i;
7164 continue;
7167 tree new_temp2 = make_ssa_name (vectype);
7168 g = gimple_build_assign (new_temp2, code, v, new_temp);
7169 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7170 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7172 v = new_temp2;
7175 tree new_temp = make_ssa_name (vectype);
7176 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7177 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7178 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7180 tree last_perm_arg = new_temp;
7181 /* For exclusive scan, new_temp computed above is the exclusive scan
7182 prefix sum. Turn it into inclusive prefix sum for the broadcast
7183 of the last element into orig. */
7184 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7186 last_perm_arg = make_ssa_name (vectype);
7187 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7188 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7189 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7192 orig = make_ssa_name (vectype);
7193 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7194 last_perm_arg, perms[units_log2]);
7195 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7196 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7198 if (!inscan_var_store)
7200 tree data_ref = fold_build2 (MEM_REF, vectype,
7201 unshare_expr (dataref_ptr),
7202 dataref_offset);
7203 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7204 g = gimple_build_assign (data_ref, new_temp);
7205 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7206 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7210 if (inscan_var_store)
7211 for (int j = 0; j < ncopies; j++)
7213 if (j != 0)
7214 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7216 tree data_ref = fold_build2 (MEM_REF, vectype,
7217 unshare_expr (dataref_ptr),
7218 dataref_offset);
7219 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7220 gimple *g = gimple_build_assign (data_ref, orig);
7221 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7222 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7224 return true;
7228 /* Function vectorizable_store.
7230 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7231 that can be vectorized.
7232 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7233 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7234 Return true if STMT_INFO is vectorizable in this way. */
7236 static bool
7237 vectorizable_store (vec_info *vinfo,
7238 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7239 gimple **vec_stmt, slp_tree slp_node,
7240 stmt_vector_for_cost *cost_vec)
7242 tree data_ref;
7243 tree op;
7244 tree vec_oprnd = NULL_TREE;
7245 tree elem_type;
7246 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7247 class loop *loop = NULL;
7248 machine_mode vec_mode;
7249 tree dummy;
7250 enum vect_def_type rhs_dt = vect_unknown_def_type;
7251 enum vect_def_type mask_dt = vect_unknown_def_type;
7252 tree dataref_ptr = NULL_TREE;
7253 tree dataref_offset = NULL_TREE;
7254 gimple *ptr_incr = NULL;
7255 int ncopies;
7256 int j;
7257 stmt_vec_info first_stmt_info;
7258 bool grouped_store;
7259 unsigned int group_size, i;
7260 vec<tree> oprnds = vNULL;
7261 vec<tree> result_chain = vNULL;
7262 vec<tree> vec_oprnds = vNULL;
7263 bool slp = (slp_node != NULL);
7264 unsigned int vec_num;
7265 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7266 tree aggr_type;
7267 gather_scatter_info gs_info;
7268 poly_uint64 vf;
7269 vec_load_store_type vls_type;
7270 tree ref_type;
7272 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7273 return false;
7275 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7276 && ! vec_stmt)
7277 return false;
7279 /* Is vectorizable store? */
7281 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7282 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7284 tree scalar_dest = gimple_assign_lhs (assign);
7285 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7286 && is_pattern_stmt_p (stmt_info))
7287 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7288 if (TREE_CODE (scalar_dest) != ARRAY_REF
7289 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7290 && TREE_CODE (scalar_dest) != INDIRECT_REF
7291 && TREE_CODE (scalar_dest) != COMPONENT_REF
7292 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7293 && TREE_CODE (scalar_dest) != REALPART_EXPR
7294 && TREE_CODE (scalar_dest) != MEM_REF)
7295 return false;
7297 else
7299 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7300 if (!call || !gimple_call_internal_p (call))
7301 return false;
7303 internal_fn ifn = gimple_call_internal_fn (call);
7304 if (!internal_store_fn_p (ifn))
7305 return false;
7307 if (slp_node != NULL)
7309 if (dump_enabled_p ())
7310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7311 "SLP of masked stores not supported.\n");
7312 return false;
7315 int mask_index = internal_fn_mask_index (ifn);
7316 if (mask_index >= 0
7317 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7318 &mask, NULL, &mask_dt, &mask_vectype))
7319 return false;
7322 op = vect_get_store_rhs (stmt_info);
7324 /* Cannot have hybrid store SLP -- that would mean storing to the
7325 same location twice. */
7326 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7328 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7329 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7331 if (loop_vinfo)
7333 loop = LOOP_VINFO_LOOP (loop_vinfo);
7334 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7336 else
7337 vf = 1;
7339 /* Multiple types in SLP are handled by creating the appropriate number of
7340 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7341 case of SLP. */
7342 if (slp)
7343 ncopies = 1;
7344 else
7345 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7347 gcc_assert (ncopies >= 1);
7349 /* FORNOW. This restriction should be relaxed. */
7350 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7352 if (dump_enabled_p ())
7353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7354 "multiple types in nested loop.\n");
7355 return false;
7358 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7359 op, &rhs_dt, &rhs_vectype, &vls_type))
7360 return false;
7362 elem_type = TREE_TYPE (vectype);
7363 vec_mode = TYPE_MODE (vectype);
7365 if (!STMT_VINFO_DATA_REF (stmt_info))
7366 return false;
7368 vect_memory_access_type memory_access_type;
7369 enum dr_alignment_support alignment_support_scheme;
7370 int misalignment;
7371 poly_int64 poffset;
7372 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7373 ncopies, &memory_access_type, &poffset,
7374 &alignment_support_scheme, &misalignment, &gs_info))
7375 return false;
7377 if (mask)
7379 if (memory_access_type == VMAT_CONTIGUOUS)
7381 if (!VECTOR_MODE_P (vec_mode)
7382 || !can_vec_mask_load_store_p (vec_mode,
7383 TYPE_MODE (mask_vectype), false))
7384 return false;
7386 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7387 && (memory_access_type != VMAT_GATHER_SCATTER
7388 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7390 if (dump_enabled_p ())
7391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7392 "unsupported access type for masked store.\n");
7393 return false;
7396 else
7398 /* FORNOW. In some cases can vectorize even if data-type not supported
7399 (e.g. - array initialization with 0). */
7400 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7401 return false;
7404 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7405 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7406 && memory_access_type != VMAT_GATHER_SCATTER
7407 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7408 if (grouped_store)
7410 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7411 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7412 group_size = DR_GROUP_SIZE (first_stmt_info);
7414 else
7416 first_stmt_info = stmt_info;
7417 first_dr_info = dr_info;
7418 group_size = vec_num = 1;
7421 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7423 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7424 memory_access_type))
7425 return false;
7428 if (!vec_stmt) /* transformation not required. */
7430 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7432 if (loop_vinfo
7433 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7434 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7435 group_size, memory_access_type,
7436 &gs_info, mask);
7438 if (slp_node
7439 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7440 vectype))
7442 if (dump_enabled_p ())
7443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7444 "incompatible vector types for invariants\n");
7445 return false;
7448 if (dump_enabled_p ()
7449 && memory_access_type != VMAT_ELEMENTWISE
7450 && memory_access_type != VMAT_GATHER_SCATTER
7451 && alignment_support_scheme != dr_aligned)
7452 dump_printf_loc (MSG_NOTE, vect_location,
7453 "Vectorizing an unaligned access.\n");
7455 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7456 vect_model_store_cost (vinfo, stmt_info, ncopies,
7457 memory_access_type, alignment_support_scheme,
7458 misalignment, vls_type, slp_node, cost_vec);
7459 return true;
7461 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7463 /* Transform. */
7465 ensure_base_align (dr_info);
7467 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7469 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7470 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7471 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7472 tree ptr, var, scale, vec_mask;
7473 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7474 tree mask_halfvectype = mask_vectype;
7475 edge pe = loop_preheader_edge (loop);
7476 gimple_seq seq;
7477 basic_block new_bb;
7478 enum { NARROW, NONE, WIDEN } modifier;
7479 poly_uint64 scatter_off_nunits
7480 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7482 if (known_eq (nunits, scatter_off_nunits))
7483 modifier = NONE;
7484 else if (known_eq (nunits * 2, scatter_off_nunits))
7486 modifier = WIDEN;
7488 /* Currently gathers and scatters are only supported for
7489 fixed-length vectors. */
7490 unsigned int count = scatter_off_nunits.to_constant ();
7491 vec_perm_builder sel (count, count, 1);
7492 for (i = 0; i < (unsigned int) count; ++i)
7493 sel.quick_push (i | (count / 2));
7495 vec_perm_indices indices (sel, 1, count);
7496 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7497 indices);
7498 gcc_assert (perm_mask != NULL_TREE);
7500 else if (known_eq (nunits, scatter_off_nunits * 2))
7502 modifier = NARROW;
7504 /* Currently gathers and scatters are only supported for
7505 fixed-length vectors. */
7506 unsigned int count = nunits.to_constant ();
7507 vec_perm_builder sel (count, count, 1);
7508 for (i = 0; i < (unsigned int) count; ++i)
7509 sel.quick_push (i | (count / 2));
7511 vec_perm_indices indices (sel, 2, count);
7512 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7513 gcc_assert (perm_mask != NULL_TREE);
7514 ncopies *= 2;
7516 if (mask)
7517 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7519 else
7520 gcc_unreachable ();
7522 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7523 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7524 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7525 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7526 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7527 scaletype = TREE_VALUE (arglist);
7529 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7530 && TREE_CODE (rettype) == VOID_TYPE);
7532 ptr = fold_convert (ptrtype, gs_info.base);
7533 if (!is_gimple_min_invariant (ptr))
7535 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7536 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7537 gcc_assert (!new_bb);
7540 if (mask == NULL_TREE)
7542 mask_arg = build_int_cst (masktype, -1);
7543 mask_arg = vect_init_vector (vinfo, stmt_info,
7544 mask_arg, masktype, NULL);
7547 scale = build_int_cst (scaletype, gs_info.scale);
7549 auto_vec<tree> vec_oprnds0;
7550 auto_vec<tree> vec_oprnds1;
7551 auto_vec<tree> vec_masks;
7552 if (mask)
7554 tree mask_vectype = truth_type_for (vectype);
7555 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7556 modifier == NARROW
7557 ? ncopies / 2 : ncopies,
7558 mask, &vec_masks, mask_vectype);
7560 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7561 modifier == WIDEN
7562 ? ncopies / 2 : ncopies,
7563 gs_info.offset, &vec_oprnds0);
7564 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7565 modifier == NARROW
7566 ? ncopies / 2 : ncopies,
7567 op, &vec_oprnds1);
7568 for (j = 0; j < ncopies; ++j)
7570 if (modifier == WIDEN)
7572 if (j & 1)
7573 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7574 perm_mask, stmt_info, gsi);
7575 else
7576 op = vec_oprnd0 = vec_oprnds0[j / 2];
7577 src = vec_oprnd1 = vec_oprnds1[j];
7578 if (mask)
7579 mask_op = vec_mask = vec_masks[j];
7581 else if (modifier == NARROW)
7583 if (j & 1)
7584 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7585 perm_mask, stmt_info, gsi);
7586 else
7587 src = vec_oprnd1 = vec_oprnds1[j / 2];
7588 op = vec_oprnd0 = vec_oprnds0[j];
7589 if (mask)
7590 mask_op = vec_mask = vec_masks[j / 2];
7592 else
7594 op = vec_oprnd0 = vec_oprnds0[j];
7595 src = vec_oprnd1 = vec_oprnds1[j];
7596 if (mask)
7597 mask_op = vec_mask = vec_masks[j];
7600 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7602 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7603 TYPE_VECTOR_SUBPARTS (srctype)));
7604 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7605 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7606 gassign *new_stmt
7607 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7608 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7609 src = var;
7612 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7614 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7615 TYPE_VECTOR_SUBPARTS (idxtype)));
7616 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7617 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7618 gassign *new_stmt
7619 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7620 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7621 op = var;
7624 if (mask)
7626 tree utype;
7627 mask_arg = mask_op;
7628 if (modifier == NARROW)
7630 var = vect_get_new_ssa_name (mask_halfvectype,
7631 vect_simple_var);
7632 gassign *new_stmt
7633 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7634 : VEC_UNPACK_LO_EXPR,
7635 mask_op);
7636 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7637 mask_arg = var;
7639 tree optype = TREE_TYPE (mask_arg);
7640 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7641 utype = masktype;
7642 else
7643 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7644 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7645 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7646 gassign *new_stmt
7647 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7648 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7649 mask_arg = var;
7650 if (!useless_type_conversion_p (masktype, utype))
7652 gcc_assert (TYPE_PRECISION (utype)
7653 <= TYPE_PRECISION (masktype));
7654 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7655 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7656 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7657 mask_arg = var;
7661 gcall *new_stmt
7662 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7663 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7665 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7667 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7668 return true;
7670 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7671 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7673 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7674 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7676 if (grouped_store)
7678 /* FORNOW */
7679 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7681 /* We vectorize all the stmts of the interleaving group when we
7682 reach the last stmt in the group. */
7683 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7684 < DR_GROUP_SIZE (first_stmt_info)
7685 && !slp)
7687 *vec_stmt = NULL;
7688 return true;
7691 if (slp)
7693 grouped_store = false;
7694 /* VEC_NUM is the number of vect stmts to be created for this
7695 group. */
7696 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7697 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7698 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7699 == first_stmt_info);
7700 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7701 op = vect_get_store_rhs (first_stmt_info);
7703 else
7704 /* VEC_NUM is the number of vect stmts to be created for this
7705 group. */
7706 vec_num = group_size;
7708 ref_type = get_group_alias_ptr_type (first_stmt_info);
7710 else
7711 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7713 if (dump_enabled_p ())
7714 dump_printf_loc (MSG_NOTE, vect_location,
7715 "transform store. ncopies = %d\n", ncopies);
7717 if (memory_access_type == VMAT_ELEMENTWISE
7718 || memory_access_type == VMAT_STRIDED_SLP)
7720 gimple_stmt_iterator incr_gsi;
7721 bool insert_after;
7722 gimple *incr;
7723 tree offvar;
7724 tree ivstep;
7725 tree running_off;
7726 tree stride_base, stride_step, alias_off;
7727 tree vec_oprnd;
7728 tree dr_offset;
7729 unsigned int g;
7730 /* Checked by get_load_store_type. */
7731 unsigned int const_nunits = nunits.to_constant ();
7733 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7734 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7736 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7737 stride_base
7738 = fold_build_pointer_plus
7739 (DR_BASE_ADDRESS (first_dr_info->dr),
7740 size_binop (PLUS_EXPR,
7741 convert_to_ptrofftype (dr_offset),
7742 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7743 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7745 /* For a store with loop-invariant (but other than power-of-2)
7746 stride (i.e. not a grouped access) like so:
7748 for (i = 0; i < n; i += stride)
7749 array[i] = ...;
7751 we generate a new induction variable and new stores from
7752 the components of the (vectorized) rhs:
7754 for (j = 0; ; j += VF*stride)
7755 vectemp = ...;
7756 tmp1 = vectemp[0];
7757 array[j] = tmp1;
7758 tmp2 = vectemp[1];
7759 array[j + stride] = tmp2;
7763 unsigned nstores = const_nunits;
7764 unsigned lnel = 1;
7765 tree ltype = elem_type;
7766 tree lvectype = vectype;
7767 if (slp)
7769 if (group_size < const_nunits
7770 && const_nunits % group_size == 0)
7772 nstores = const_nunits / group_size;
7773 lnel = group_size;
7774 ltype = build_vector_type (elem_type, group_size);
7775 lvectype = vectype;
7777 /* First check if vec_extract optab doesn't support extraction
7778 of vector elts directly. */
7779 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7780 machine_mode vmode;
7781 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7782 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7783 group_size).exists (&vmode)
7784 || (convert_optab_handler (vec_extract_optab,
7785 TYPE_MODE (vectype), vmode)
7786 == CODE_FOR_nothing))
7788 /* Try to avoid emitting an extract of vector elements
7789 by performing the extracts using an integer type of the
7790 same size, extracting from a vector of those and then
7791 re-interpreting it as the original vector type if
7792 supported. */
7793 unsigned lsize
7794 = group_size * GET_MODE_BITSIZE (elmode);
7795 unsigned int lnunits = const_nunits / group_size;
7796 /* If we can't construct such a vector fall back to
7797 element extracts from the original vector type and
7798 element size stores. */
7799 if (int_mode_for_size (lsize, 0).exists (&elmode)
7800 && VECTOR_MODE_P (TYPE_MODE (vectype))
7801 && related_vector_mode (TYPE_MODE (vectype), elmode,
7802 lnunits).exists (&vmode)
7803 && (convert_optab_handler (vec_extract_optab,
7804 vmode, elmode)
7805 != CODE_FOR_nothing))
7807 nstores = lnunits;
7808 lnel = group_size;
7809 ltype = build_nonstandard_integer_type (lsize, 1);
7810 lvectype = build_vector_type (ltype, nstores);
7812 /* Else fall back to vector extraction anyway.
7813 Fewer stores are more important than avoiding spilling
7814 of the vector we extract from. Compared to the
7815 construction case in vectorizable_load no store-forwarding
7816 issue exists here for reasonable archs. */
7819 else if (group_size >= const_nunits
7820 && group_size % const_nunits == 0)
7822 nstores = 1;
7823 lnel = const_nunits;
7824 ltype = vectype;
7825 lvectype = vectype;
7827 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7828 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7831 ivstep = stride_step;
7832 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7833 build_int_cst (TREE_TYPE (ivstep), vf));
7835 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7837 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7838 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7839 create_iv (stride_base, ivstep, NULL,
7840 loop, &incr_gsi, insert_after,
7841 &offvar, NULL);
7842 incr = gsi_stmt (incr_gsi);
7844 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7846 alias_off = build_int_cst (ref_type, 0);
7847 stmt_vec_info next_stmt_info = first_stmt_info;
7848 for (g = 0; g < group_size; g++)
7850 running_off = offvar;
7851 if (g)
7853 tree size = TYPE_SIZE_UNIT (ltype);
7854 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7855 size);
7856 tree newoff = copy_ssa_name (running_off, NULL);
7857 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7858 running_off, pos);
7859 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7860 running_off = newoff;
7862 if (!slp)
7863 op = vect_get_store_rhs (next_stmt_info);
7864 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7865 op, &vec_oprnds);
7866 unsigned int group_el = 0;
7867 unsigned HOST_WIDE_INT
7868 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7869 for (j = 0; j < ncopies; j++)
7871 vec_oprnd = vec_oprnds[j];
7872 /* Pun the vector to extract from if necessary. */
7873 if (lvectype != vectype)
7875 tree tem = make_ssa_name (lvectype);
7876 gimple *pun
7877 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7878 lvectype, vec_oprnd));
7879 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7880 vec_oprnd = tem;
7882 for (i = 0; i < nstores; i++)
7884 tree newref, newoff;
7885 gimple *incr, *assign;
7886 tree size = TYPE_SIZE (ltype);
7887 /* Extract the i'th component. */
7888 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7889 bitsize_int (i), size);
7890 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7891 size, pos);
7893 elem = force_gimple_operand_gsi (gsi, elem, true,
7894 NULL_TREE, true,
7895 GSI_SAME_STMT);
7897 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7898 group_el * elsz);
7899 newref = build2 (MEM_REF, ltype,
7900 running_off, this_off);
7901 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7903 /* And store it to *running_off. */
7904 assign = gimple_build_assign (newref, elem);
7905 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7907 group_el += lnel;
7908 if (! slp
7909 || group_el == group_size)
7911 newoff = copy_ssa_name (running_off, NULL);
7912 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7913 running_off, stride_step);
7914 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7916 running_off = newoff;
7917 group_el = 0;
7919 if (g == group_size - 1
7920 && !slp)
7922 if (j == 0 && i == 0)
7923 *vec_stmt = assign;
7924 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7928 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7929 vec_oprnds.release ();
7930 if (slp)
7931 break;
7934 return true;
7937 auto_vec<tree> dr_chain (group_size);
7938 oprnds.create (group_size);
7940 gcc_assert (alignment_support_scheme);
7941 vec_loop_masks *loop_masks
7942 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7943 ? &LOOP_VINFO_MASKS (loop_vinfo)
7944 : NULL);
7945 vec_loop_lens *loop_lens
7946 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7947 ? &LOOP_VINFO_LENS (loop_vinfo)
7948 : NULL);
7950 /* Shouldn't go with length-based approach if fully masked. */
7951 gcc_assert (!loop_lens || !loop_masks);
7953 /* Targets with store-lane instructions must not require explicit
7954 realignment. vect_supportable_dr_alignment always returns either
7955 dr_aligned or dr_unaligned_supported for masked operations. */
7956 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7957 && !mask
7958 && !loop_masks)
7959 || alignment_support_scheme == dr_aligned
7960 || alignment_support_scheme == dr_unaligned_supported);
7962 tree offset = NULL_TREE;
7963 if (!known_eq (poffset, 0))
7964 offset = size_int (poffset);
7966 tree bump;
7967 tree vec_offset = NULL_TREE;
7968 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7970 aggr_type = NULL_TREE;
7971 bump = NULL_TREE;
7973 else if (memory_access_type == VMAT_GATHER_SCATTER)
7975 aggr_type = elem_type;
7976 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7977 &bump, &vec_offset);
7979 else
7981 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7982 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7983 else
7984 aggr_type = vectype;
7985 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7986 memory_access_type);
7989 if (mask)
7990 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7992 /* In case the vectorization factor (VF) is bigger than the number
7993 of elements that we can fit in a vectype (nunits), we have to generate
7994 more than one vector stmt - i.e - we need to "unroll" the
7995 vector stmt by a factor VF/nunits. */
7997 /* In case of interleaving (non-unit grouped access):
7999 S1: &base + 2 = x2
8000 S2: &base = x0
8001 S3: &base + 1 = x1
8002 S4: &base + 3 = x3
8004 We create vectorized stores starting from base address (the access of the
8005 first stmt in the chain (S2 in the above example), when the last store stmt
8006 of the chain (S4) is reached:
8008 VS1: &base = vx2
8009 VS2: &base + vec_size*1 = vx0
8010 VS3: &base + vec_size*2 = vx1
8011 VS4: &base + vec_size*3 = vx3
8013 Then permutation statements are generated:
8015 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8016 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8019 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8020 (the order of the data-refs in the output of vect_permute_store_chain
8021 corresponds to the order of scalar stmts in the interleaving chain - see
8022 the documentation of vect_permute_store_chain()).
8024 In case of both multiple types and interleaving, above vector stores and
8025 permutation stmts are created for every copy. The result vector stmts are
8026 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8027 STMT_VINFO_RELATED_STMT for the next copies.
8030 auto_vec<tree> vec_masks;
8031 tree vec_mask = NULL;
8032 auto_vec<tree> vec_offsets;
8033 auto_vec<vec<tree> > gvec_oprnds;
8034 gvec_oprnds.safe_grow_cleared (group_size, true);
8035 for (j = 0; j < ncopies; j++)
8037 gimple *new_stmt;
8038 if (j == 0)
8040 if (slp)
8042 /* Get vectorized arguments for SLP_NODE. */
8043 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8044 op, &vec_oprnds);
8045 vec_oprnd = vec_oprnds[0];
8047 else
8049 /* For interleaved stores we collect vectorized defs for all the
8050 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8051 used as an input to vect_permute_store_chain().
8053 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8054 and OPRNDS are of size 1. */
8055 stmt_vec_info next_stmt_info = first_stmt_info;
8056 for (i = 0; i < group_size; i++)
8058 /* Since gaps are not supported for interleaved stores,
8059 DR_GROUP_SIZE is the exact number of stmts in the chain.
8060 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8061 that there is no interleaving, DR_GROUP_SIZE is 1,
8062 and only one iteration of the loop will be executed. */
8063 op = vect_get_store_rhs (next_stmt_info);
8064 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8065 ncopies, op, &gvec_oprnds[i]);
8066 vec_oprnd = gvec_oprnds[i][0];
8067 dr_chain.quick_push (gvec_oprnds[i][0]);
8068 oprnds.quick_push (gvec_oprnds[i][0]);
8069 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8071 if (mask)
8073 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8074 mask, &vec_masks, mask_vectype);
8075 vec_mask = vec_masks[0];
8079 /* We should have catched mismatched types earlier. */
8080 gcc_assert (useless_type_conversion_p (vectype,
8081 TREE_TYPE (vec_oprnd)));
8082 bool simd_lane_access_p
8083 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8084 if (simd_lane_access_p
8085 && !loop_masks
8086 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8087 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8088 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8089 && integer_zerop (DR_INIT (first_dr_info->dr))
8090 && alias_sets_conflict_p (get_alias_set (aggr_type),
8091 get_alias_set (TREE_TYPE (ref_type))))
8093 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8094 dataref_offset = build_int_cst (ref_type, 0);
8096 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8098 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8099 &gs_info, &dataref_ptr,
8100 &vec_offsets);
8101 vec_offset = vec_offsets[0];
8103 else
8104 dataref_ptr
8105 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8106 simd_lane_access_p ? loop : NULL,
8107 offset, &dummy, gsi, &ptr_incr,
8108 simd_lane_access_p, NULL_TREE, bump);
8110 else
8112 /* For interleaved stores we created vectorized defs for all the
8113 defs stored in OPRNDS in the previous iteration (previous copy).
8114 DR_CHAIN is then used as an input to vect_permute_store_chain().
8115 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8116 OPRNDS are of size 1. */
8117 for (i = 0; i < group_size; i++)
8119 vec_oprnd = gvec_oprnds[i][j];
8120 dr_chain[i] = gvec_oprnds[i][j];
8121 oprnds[i] = gvec_oprnds[i][j];
8123 if (mask)
8124 vec_mask = vec_masks[j];
8125 if (dataref_offset)
8126 dataref_offset
8127 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8128 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8129 vec_offset = vec_offsets[j];
8130 else
8131 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8132 stmt_info, bump);
8135 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8137 tree vec_array;
8139 /* Get an array into which we can store the individual vectors. */
8140 vec_array = create_vector_array (vectype, vec_num);
8142 /* Invalidate the current contents of VEC_ARRAY. This should
8143 become an RTL clobber too, which prevents the vector registers
8144 from being upward-exposed. */
8145 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8147 /* Store the individual vectors into the array. */
8148 for (i = 0; i < vec_num; i++)
8150 vec_oprnd = dr_chain[i];
8151 write_vector_array (vinfo, stmt_info,
8152 gsi, vec_oprnd, vec_array, i);
8155 tree final_mask = NULL;
8156 if (loop_masks)
8157 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8158 vectype, j);
8159 if (vec_mask)
8160 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8161 vec_mask, gsi);
8163 gcall *call;
8164 if (final_mask)
8166 /* Emit:
8167 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8168 VEC_ARRAY). */
8169 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8170 tree alias_ptr = build_int_cst (ref_type, align);
8171 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8172 dataref_ptr, alias_ptr,
8173 final_mask, vec_array);
8175 else
8177 /* Emit:
8178 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8179 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8180 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8181 vec_array);
8182 gimple_call_set_lhs (call, data_ref);
8184 gimple_call_set_nothrow (call, true);
8185 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8186 new_stmt = call;
8188 /* Record that VEC_ARRAY is now dead. */
8189 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8191 else
8193 new_stmt = NULL;
8194 if (grouped_store)
8196 if (j == 0)
8197 result_chain.create (group_size);
8198 /* Permute. */
8199 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8200 gsi, &result_chain);
8203 stmt_vec_info next_stmt_info = first_stmt_info;
8204 for (i = 0; i < vec_num; i++)
8206 unsigned misalign;
8207 unsigned HOST_WIDE_INT align;
8209 tree final_mask = NULL_TREE;
8210 if (loop_masks)
8211 final_mask = vect_get_loop_mask (gsi, loop_masks,
8212 vec_num * ncopies,
8213 vectype, vec_num * j + i);
8214 if (vec_mask)
8215 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8216 vec_mask, gsi);
8218 if (memory_access_type == VMAT_GATHER_SCATTER)
8220 tree scale = size_int (gs_info.scale);
8221 gcall *call;
8222 if (final_mask)
8223 call = gimple_build_call_internal
8224 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8225 scale, vec_oprnd, final_mask);
8226 else
8227 call = gimple_build_call_internal
8228 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8229 scale, vec_oprnd);
8230 gimple_call_set_nothrow (call, true);
8231 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8232 new_stmt = call;
8233 break;
8236 if (i > 0)
8237 /* Bump the vector pointer. */
8238 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8239 gsi, stmt_info, bump);
8241 if (slp)
8242 vec_oprnd = vec_oprnds[i];
8243 else if (grouped_store)
8244 /* For grouped stores vectorized defs are interleaved in
8245 vect_permute_store_chain(). */
8246 vec_oprnd = result_chain[i];
8248 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8249 if (alignment_support_scheme == dr_aligned)
8251 gcc_assert (aligned_access_p (first_dr_info, vectype));
8252 misalign = 0;
8254 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8256 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8257 misalign = 0;
8259 else
8260 misalign = misalignment;
8261 if (dataref_offset == NULL_TREE
8262 && TREE_CODE (dataref_ptr) == SSA_NAME)
8263 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8264 misalign);
8265 align = least_bit_hwi (misalign | align);
8267 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8269 tree perm_mask = perm_mask_for_reverse (vectype);
8270 tree perm_dest = vect_create_destination_var
8271 (vect_get_store_rhs (stmt_info), vectype);
8272 tree new_temp = make_ssa_name (perm_dest);
8274 /* Generate the permute statement. */
8275 gimple *perm_stmt
8276 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8277 vec_oprnd, perm_mask);
8278 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8280 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8281 vec_oprnd = new_temp;
8284 /* Arguments are ready. Create the new vector stmt. */
8285 if (final_mask)
8287 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8288 gcall *call
8289 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8290 dataref_ptr, ptr,
8291 final_mask, vec_oprnd);
8292 gimple_call_set_nothrow (call, true);
8293 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8294 new_stmt = call;
8296 else if (loop_lens)
8298 tree final_len
8299 = vect_get_loop_len (loop_vinfo, loop_lens,
8300 vec_num * ncopies, vec_num * j + i);
8301 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8302 machine_mode vmode = TYPE_MODE (vectype);
8303 opt_machine_mode new_ovmode
8304 = get_len_load_store_mode (vmode, false);
8305 machine_mode new_vmode = new_ovmode.require ();
8306 /* Need conversion if it's wrapped with VnQI. */
8307 if (vmode != new_vmode)
8309 tree new_vtype
8310 = build_vector_type_for_mode (unsigned_intQI_type_node,
8311 new_vmode);
8312 tree var
8313 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8314 vec_oprnd
8315 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8316 gassign *new_stmt
8317 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8318 vec_oprnd);
8319 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8320 gsi);
8321 vec_oprnd = var;
8323 gcall *call
8324 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8325 ptr, final_len, vec_oprnd);
8326 gimple_call_set_nothrow (call, true);
8327 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8328 new_stmt = call;
8330 else
8332 data_ref = fold_build2 (MEM_REF, vectype,
8333 dataref_ptr,
8334 dataref_offset
8335 ? dataref_offset
8336 : build_int_cst (ref_type, 0));
8337 if (alignment_support_scheme == dr_aligned)
8338 gcc_assert (aligned_access_p (first_dr_info, vectype));
8339 else
8340 TREE_TYPE (data_ref)
8341 = build_aligned_type (TREE_TYPE (data_ref),
8342 align * BITS_PER_UNIT);
8343 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8344 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8345 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8348 if (slp)
8349 continue;
8351 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8352 if (!next_stmt_info)
8353 break;
8356 if (!slp)
8358 if (j == 0)
8359 *vec_stmt = new_stmt;
8360 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8364 for (i = 0; i < group_size; ++i)
8366 vec<tree> oprndsi = gvec_oprnds[i];
8367 oprndsi.release ();
8369 oprnds.release ();
8370 result_chain.release ();
8371 vec_oprnds.release ();
8373 return true;
8376 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8377 VECTOR_CST mask. No checks are made that the target platform supports the
8378 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8379 vect_gen_perm_mask_checked. */
8381 tree
8382 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8384 tree mask_type;
8386 poly_uint64 nunits = sel.length ();
8387 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8389 mask_type = build_vector_type (ssizetype, nunits);
8390 return vec_perm_indices_to_tree (mask_type, sel);
8393 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8394 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8396 tree
8397 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8399 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8400 return vect_gen_perm_mask_any (vectype, sel);
8403 /* Given a vector variable X and Y, that was generated for the scalar
8404 STMT_INFO, generate instructions to permute the vector elements of X and Y
8405 using permutation mask MASK_VEC, insert them at *GSI and return the
8406 permuted vector variable. */
8408 static tree
8409 permute_vec_elements (vec_info *vinfo,
8410 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8411 gimple_stmt_iterator *gsi)
8413 tree vectype = TREE_TYPE (x);
8414 tree perm_dest, data_ref;
8415 gimple *perm_stmt;
8417 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8418 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8419 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8420 else
8421 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8422 data_ref = make_ssa_name (perm_dest);
8424 /* Generate the permute statement. */
8425 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8426 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8428 return data_ref;
8431 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8432 inserting them on the loops preheader edge. Returns true if we
8433 were successful in doing so (and thus STMT_INFO can be moved then),
8434 otherwise returns false. */
8436 static bool
8437 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8439 ssa_op_iter i;
8440 tree op;
8441 bool any = false;
8443 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8445 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8446 if (!gimple_nop_p (def_stmt)
8447 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8449 /* Make sure we don't need to recurse. While we could do
8450 so in simple cases when there are more complex use webs
8451 we don't have an easy way to preserve stmt order to fulfil
8452 dependencies within them. */
8453 tree op2;
8454 ssa_op_iter i2;
8455 if (gimple_code (def_stmt) == GIMPLE_PHI)
8456 return false;
8457 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8459 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8460 if (!gimple_nop_p (def_stmt2)
8461 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8462 return false;
8464 any = true;
8468 if (!any)
8469 return true;
8471 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8473 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8474 if (!gimple_nop_p (def_stmt)
8475 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8477 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8478 gsi_remove (&gsi, false);
8479 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8483 return true;
8486 /* vectorizable_load.
8488 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8489 that can be vectorized.
8490 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8491 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8492 Return true if STMT_INFO is vectorizable in this way. */
8494 static bool
8495 vectorizable_load (vec_info *vinfo,
8496 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8497 gimple **vec_stmt, slp_tree slp_node,
8498 stmt_vector_for_cost *cost_vec)
8500 tree scalar_dest;
8501 tree vec_dest = NULL;
8502 tree data_ref = NULL;
8503 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8504 class loop *loop = NULL;
8505 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8506 bool nested_in_vect_loop = false;
8507 tree elem_type;
8508 tree new_temp;
8509 machine_mode mode;
8510 tree dummy;
8511 tree dataref_ptr = NULL_TREE;
8512 tree dataref_offset = NULL_TREE;
8513 gimple *ptr_incr = NULL;
8514 int ncopies;
8515 int i, j;
8516 unsigned int group_size;
8517 poly_uint64 group_gap_adj;
8518 tree msq = NULL_TREE, lsq;
8519 tree byte_offset = NULL_TREE;
8520 tree realignment_token = NULL_TREE;
8521 gphi *phi = NULL;
8522 vec<tree> dr_chain = vNULL;
8523 bool grouped_load = false;
8524 stmt_vec_info first_stmt_info;
8525 stmt_vec_info first_stmt_info_for_drptr = NULL;
8526 bool compute_in_loop = false;
8527 class loop *at_loop;
8528 int vec_num;
8529 bool slp = (slp_node != NULL);
8530 bool slp_perm = false;
8531 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8532 poly_uint64 vf;
8533 tree aggr_type;
8534 gather_scatter_info gs_info;
8535 tree ref_type;
8536 enum vect_def_type mask_dt = vect_unknown_def_type;
8538 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8539 return false;
8541 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8542 && ! vec_stmt)
8543 return false;
8545 if (!STMT_VINFO_DATA_REF (stmt_info))
8546 return false;
8548 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8549 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8551 scalar_dest = gimple_assign_lhs (assign);
8552 if (TREE_CODE (scalar_dest) != SSA_NAME)
8553 return false;
8555 tree_code code = gimple_assign_rhs_code (assign);
8556 if (code != ARRAY_REF
8557 && code != BIT_FIELD_REF
8558 && code != INDIRECT_REF
8559 && code != COMPONENT_REF
8560 && code != IMAGPART_EXPR
8561 && code != REALPART_EXPR
8562 && code != MEM_REF
8563 && TREE_CODE_CLASS (code) != tcc_declaration)
8564 return false;
8566 else
8568 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8569 if (!call || !gimple_call_internal_p (call))
8570 return false;
8572 internal_fn ifn = gimple_call_internal_fn (call);
8573 if (!internal_load_fn_p (ifn))
8574 return false;
8576 scalar_dest = gimple_call_lhs (call);
8577 if (!scalar_dest)
8578 return false;
8580 int mask_index = internal_fn_mask_index (ifn);
8581 if (mask_index >= 0
8582 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node,
8583 /* ??? For SLP we only have operands for
8584 the mask operand. */
8585 slp_node ? 0 : mask_index,
8586 &mask, NULL, &mask_dt, &mask_vectype))
8587 return false;
8590 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8591 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8593 if (loop_vinfo)
8595 loop = LOOP_VINFO_LOOP (loop_vinfo);
8596 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8597 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8599 else
8600 vf = 1;
8602 /* Multiple types in SLP are handled by creating the appropriate number of
8603 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8604 case of SLP. */
8605 if (slp)
8606 ncopies = 1;
8607 else
8608 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8610 gcc_assert (ncopies >= 1);
8612 /* FORNOW. This restriction should be relaxed. */
8613 if (nested_in_vect_loop && ncopies > 1)
8615 if (dump_enabled_p ())
8616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8617 "multiple types in nested loop.\n");
8618 return false;
8621 /* Invalidate assumptions made by dependence analysis when vectorization
8622 on the unrolled body effectively re-orders stmts. */
8623 if (ncopies > 1
8624 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8625 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8626 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8628 if (dump_enabled_p ())
8629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8630 "cannot perform implicit CSE when unrolling "
8631 "with negative dependence distance\n");
8632 return false;
8635 elem_type = TREE_TYPE (vectype);
8636 mode = TYPE_MODE (vectype);
8638 /* FORNOW. In some cases can vectorize even if data-type not supported
8639 (e.g. - data copies). */
8640 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8642 if (dump_enabled_p ())
8643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8644 "Aligned load, but unsupported type.\n");
8645 return false;
8648 /* Check if the load is a part of an interleaving chain. */
8649 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8651 grouped_load = true;
8652 /* FORNOW */
8653 gcc_assert (!nested_in_vect_loop);
8654 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8656 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8657 group_size = DR_GROUP_SIZE (first_stmt_info);
8659 /* Refuse non-SLP vectorization of SLP-only groups. */
8660 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8662 if (dump_enabled_p ())
8663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8664 "cannot vectorize load in non-SLP mode.\n");
8665 return false;
8668 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8670 slp_perm = true;
8672 if (!loop_vinfo)
8674 /* In BB vectorization we may not actually use a loaded vector
8675 accessing elements in excess of DR_GROUP_SIZE. */
8676 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8677 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8678 unsigned HOST_WIDE_INT nunits;
8679 unsigned j, k, maxk = 0;
8680 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8681 if (k > maxk)
8682 maxk = k;
8683 tree vectype = SLP_TREE_VECTYPE (slp_node);
8684 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8685 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8687 if (dump_enabled_p ())
8688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8689 "BB vectorization with gaps at the end of "
8690 "a load is not supported\n");
8691 return false;
8695 auto_vec<tree> tem;
8696 unsigned n_perms;
8697 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8698 true, &n_perms))
8700 if (dump_enabled_p ())
8701 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8702 vect_location,
8703 "unsupported load permutation\n");
8704 return false;
8708 /* Invalidate assumptions made by dependence analysis when vectorization
8709 on the unrolled body effectively re-orders stmts. */
8710 if (!PURE_SLP_STMT (stmt_info)
8711 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8712 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8713 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8715 if (dump_enabled_p ())
8716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8717 "cannot perform implicit CSE when performing "
8718 "group loads with negative dependence distance\n");
8719 return false;
8722 else
8723 group_size = 1;
8725 vect_memory_access_type memory_access_type;
8726 enum dr_alignment_support alignment_support_scheme;
8727 int misalignment;
8728 poly_int64 poffset;
8729 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8730 ncopies, &memory_access_type, &poffset,
8731 &alignment_support_scheme, &misalignment, &gs_info))
8732 return false;
8734 if (mask)
8736 if (memory_access_type == VMAT_CONTIGUOUS)
8738 machine_mode vec_mode = TYPE_MODE (vectype);
8739 if (!VECTOR_MODE_P (vec_mode)
8740 || !can_vec_mask_load_store_p (vec_mode,
8741 TYPE_MODE (mask_vectype), true))
8742 return false;
8744 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8745 && memory_access_type != VMAT_GATHER_SCATTER)
8747 if (dump_enabled_p ())
8748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8749 "unsupported access type for masked load.\n");
8750 return false;
8752 else if (memory_access_type == VMAT_GATHER_SCATTER
8753 && gs_info.ifn == IFN_LAST
8754 && !gs_info.decl)
8756 if (dump_enabled_p ())
8757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8758 "unsupported masked emulated gather.\n");
8759 return false;
8763 if (!vec_stmt) /* transformation not required. */
8765 if (slp_node
8766 && mask
8767 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8768 mask_vectype))
8770 if (dump_enabled_p ())
8771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8772 "incompatible vector types for invariants\n");
8773 return false;
8776 if (!slp)
8777 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8779 if (loop_vinfo
8780 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8781 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8782 group_size, memory_access_type,
8783 &gs_info, mask);
8785 if (dump_enabled_p ()
8786 && memory_access_type != VMAT_ELEMENTWISE
8787 && memory_access_type != VMAT_GATHER_SCATTER
8788 && alignment_support_scheme != dr_aligned)
8789 dump_printf_loc (MSG_NOTE, vect_location,
8790 "Vectorizing an unaligned access.\n");
8792 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8793 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8794 alignment_support_scheme, misalignment,
8795 &gs_info, slp_node, cost_vec);
8796 return true;
8799 if (!slp)
8800 gcc_assert (memory_access_type
8801 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8803 if (dump_enabled_p ())
8804 dump_printf_loc (MSG_NOTE, vect_location,
8805 "transform load. ncopies = %d\n", ncopies);
8807 /* Transform. */
8809 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8810 ensure_base_align (dr_info);
8812 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8814 vect_build_gather_load_calls (vinfo,
8815 stmt_info, gsi, vec_stmt, &gs_info, mask);
8816 return true;
8819 if (memory_access_type == VMAT_INVARIANT)
8821 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8822 /* If we have versioned for aliasing or the loop doesn't
8823 have any data dependencies that would preclude this,
8824 then we are sure this is a loop invariant load and
8825 thus we can insert it on the preheader edge. */
8826 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8827 && !nested_in_vect_loop
8828 && hoist_defs_of_uses (stmt_info, loop));
8829 if (hoist_p)
8831 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8832 if (dump_enabled_p ())
8833 dump_printf_loc (MSG_NOTE, vect_location,
8834 "hoisting out of the vectorized loop: %G", stmt);
8835 scalar_dest = copy_ssa_name (scalar_dest);
8836 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8837 gsi_insert_on_edge_immediate
8838 (loop_preheader_edge (loop),
8839 gimple_build_assign (scalar_dest, rhs));
8841 /* These copies are all equivalent, but currently the representation
8842 requires a separate STMT_VINFO_VEC_STMT for each one. */
8843 gimple_stmt_iterator gsi2 = *gsi;
8844 gsi_next (&gsi2);
8845 for (j = 0; j < ncopies; j++)
8847 if (hoist_p)
8848 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8849 vectype, NULL);
8850 else
8851 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8852 vectype, &gsi2);
8853 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8854 if (slp)
8855 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8856 else
8858 if (j == 0)
8859 *vec_stmt = new_stmt;
8860 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8863 return true;
8866 if (memory_access_type == VMAT_ELEMENTWISE
8867 || memory_access_type == VMAT_STRIDED_SLP)
8869 gimple_stmt_iterator incr_gsi;
8870 bool insert_after;
8871 tree offvar;
8872 tree ivstep;
8873 tree running_off;
8874 vec<constructor_elt, va_gc> *v = NULL;
8875 tree stride_base, stride_step, alias_off;
8876 /* Checked by get_load_store_type. */
8877 unsigned int const_nunits = nunits.to_constant ();
8878 unsigned HOST_WIDE_INT cst_offset = 0;
8879 tree dr_offset;
8881 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8882 gcc_assert (!nested_in_vect_loop);
8884 if (grouped_load)
8886 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8887 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8889 else
8891 first_stmt_info = stmt_info;
8892 first_dr_info = dr_info;
8894 if (slp && grouped_load)
8896 group_size = DR_GROUP_SIZE (first_stmt_info);
8897 ref_type = get_group_alias_ptr_type (first_stmt_info);
8899 else
8901 if (grouped_load)
8902 cst_offset
8903 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8904 * vect_get_place_in_interleaving_chain (stmt_info,
8905 first_stmt_info));
8906 group_size = 1;
8907 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8910 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8911 stride_base
8912 = fold_build_pointer_plus
8913 (DR_BASE_ADDRESS (first_dr_info->dr),
8914 size_binop (PLUS_EXPR,
8915 convert_to_ptrofftype (dr_offset),
8916 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8917 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8919 /* For a load with loop-invariant (but other than power-of-2)
8920 stride (i.e. not a grouped access) like so:
8922 for (i = 0; i < n; i += stride)
8923 ... = array[i];
8925 we generate a new induction variable and new accesses to
8926 form a new vector (or vectors, depending on ncopies):
8928 for (j = 0; ; j += VF*stride)
8929 tmp1 = array[j];
8930 tmp2 = array[j + stride];
8932 vectemp = {tmp1, tmp2, ...}
8935 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8936 build_int_cst (TREE_TYPE (stride_step), vf));
8938 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8940 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8941 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8942 create_iv (stride_base, ivstep, NULL,
8943 loop, &incr_gsi, insert_after,
8944 &offvar, NULL);
8946 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8948 running_off = offvar;
8949 alias_off = build_int_cst (ref_type, 0);
8950 int nloads = const_nunits;
8951 int lnel = 1;
8952 tree ltype = TREE_TYPE (vectype);
8953 tree lvectype = vectype;
8954 auto_vec<tree> dr_chain;
8955 if (memory_access_type == VMAT_STRIDED_SLP)
8957 if (group_size < const_nunits)
8959 /* First check if vec_init optab supports construction from vector
8960 elts directly. Otherwise avoid emitting a constructor of
8961 vector elements by performing the loads using an integer type
8962 of the same size, constructing a vector of those and then
8963 re-interpreting it as the original vector type. This avoids a
8964 huge runtime penalty due to the general inability to perform
8965 store forwarding from smaller stores to a larger load. */
8966 tree ptype;
8967 tree vtype
8968 = vector_vector_composition_type (vectype,
8969 const_nunits / group_size,
8970 &ptype);
8971 if (vtype != NULL_TREE)
8973 nloads = const_nunits / group_size;
8974 lnel = group_size;
8975 lvectype = vtype;
8976 ltype = ptype;
8979 else
8981 nloads = 1;
8982 lnel = const_nunits;
8983 ltype = vectype;
8985 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8987 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8988 else if (nloads == 1)
8989 ltype = vectype;
8991 if (slp)
8993 /* For SLP permutation support we need to load the whole group,
8994 not only the number of vector stmts the permutation result
8995 fits in. */
8996 if (slp_perm)
8998 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8999 variable VF. */
9000 unsigned int const_vf = vf.to_constant ();
9001 ncopies = CEIL (group_size * const_vf, const_nunits);
9002 dr_chain.create (ncopies);
9004 else
9005 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9007 unsigned int group_el = 0;
9008 unsigned HOST_WIDE_INT
9009 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9010 for (j = 0; j < ncopies; j++)
9012 if (nloads > 1)
9013 vec_alloc (v, nloads);
9014 gimple *new_stmt = NULL;
9015 for (i = 0; i < nloads; i++)
9017 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9018 group_el * elsz + cst_offset);
9019 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9020 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9021 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9022 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9023 if (nloads > 1)
9024 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9025 gimple_assign_lhs (new_stmt));
9027 group_el += lnel;
9028 if (! slp
9029 || group_el == group_size)
9031 tree newoff = copy_ssa_name (running_off);
9032 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9033 running_off, stride_step);
9034 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9036 running_off = newoff;
9037 group_el = 0;
9040 if (nloads > 1)
9042 tree vec_inv = build_constructor (lvectype, v);
9043 new_temp = vect_init_vector (vinfo, stmt_info,
9044 vec_inv, lvectype, gsi);
9045 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9046 if (lvectype != vectype)
9048 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9049 VIEW_CONVERT_EXPR,
9050 build1 (VIEW_CONVERT_EXPR,
9051 vectype, new_temp));
9052 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9056 if (slp)
9058 if (slp_perm)
9059 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9060 else
9061 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9063 else
9065 if (j == 0)
9066 *vec_stmt = new_stmt;
9067 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9070 if (slp_perm)
9072 unsigned n_perms;
9073 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9074 false, &n_perms);
9076 return true;
9079 if (memory_access_type == VMAT_GATHER_SCATTER
9080 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9081 grouped_load = false;
9083 if (grouped_load)
9085 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9086 group_size = DR_GROUP_SIZE (first_stmt_info);
9087 /* For SLP vectorization we directly vectorize a subchain
9088 without permutation. */
9089 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9090 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9091 /* For BB vectorization always use the first stmt to base
9092 the data ref pointer on. */
9093 if (bb_vinfo)
9094 first_stmt_info_for_drptr
9095 = vect_find_first_scalar_stmt_in_slp (slp_node);
9097 /* Check if the chain of loads is already vectorized. */
9098 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9099 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9100 ??? But we can only do so if there is exactly one
9101 as we have no way to get at the rest. Leave the CSE
9102 opportunity alone.
9103 ??? With the group load eventually participating
9104 in multiple different permutations (having multiple
9105 slp nodes which refer to the same group) the CSE
9106 is even wrong code. See PR56270. */
9107 && !slp)
9109 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9110 return true;
9112 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9113 group_gap_adj = 0;
9115 /* VEC_NUM is the number of vect stmts to be created for this group. */
9116 if (slp)
9118 grouped_load = false;
9119 /* If an SLP permutation is from N elements to N elements,
9120 and if one vector holds a whole number of N, we can load
9121 the inputs to the permutation in the same way as an
9122 unpermuted sequence. In other cases we need to load the
9123 whole group, not only the number of vector stmts the
9124 permutation result fits in. */
9125 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9126 if (slp_perm
9127 && (group_size != scalar_lanes
9128 || !multiple_p (nunits, group_size)))
9130 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9131 variable VF; see vect_transform_slp_perm_load. */
9132 unsigned int const_vf = vf.to_constant ();
9133 unsigned int const_nunits = nunits.to_constant ();
9134 vec_num = CEIL (group_size * const_vf, const_nunits);
9135 group_gap_adj = vf * group_size - nunits * vec_num;
9137 else
9139 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9140 group_gap_adj
9141 = group_size - scalar_lanes;
9144 else
9145 vec_num = group_size;
9147 ref_type = get_group_alias_ptr_type (first_stmt_info);
9149 else
9151 first_stmt_info = stmt_info;
9152 first_dr_info = dr_info;
9153 group_size = vec_num = 1;
9154 group_gap_adj = 0;
9155 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9158 gcc_assert (alignment_support_scheme);
9159 vec_loop_masks *loop_masks
9160 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9161 ? &LOOP_VINFO_MASKS (loop_vinfo)
9162 : NULL);
9163 vec_loop_lens *loop_lens
9164 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9165 ? &LOOP_VINFO_LENS (loop_vinfo)
9166 : NULL);
9168 /* Shouldn't go with length-based approach if fully masked. */
9169 gcc_assert (!loop_lens || !loop_masks);
9171 /* Targets with store-lane instructions must not require explicit
9172 realignment. vect_supportable_dr_alignment always returns either
9173 dr_aligned or dr_unaligned_supported for masked operations. */
9174 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9175 && !mask
9176 && !loop_masks)
9177 || alignment_support_scheme == dr_aligned
9178 || alignment_support_scheme == dr_unaligned_supported);
9180 /* In case the vectorization factor (VF) is bigger than the number
9181 of elements that we can fit in a vectype (nunits), we have to generate
9182 more than one vector stmt - i.e - we need to "unroll" the
9183 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9184 from one copy of the vector stmt to the next, in the field
9185 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9186 stages to find the correct vector defs to be used when vectorizing
9187 stmts that use the defs of the current stmt. The example below
9188 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9189 need to create 4 vectorized stmts):
9191 before vectorization:
9192 RELATED_STMT VEC_STMT
9193 S1: x = memref - -
9194 S2: z = x + 1 - -
9196 step 1: vectorize stmt S1:
9197 We first create the vector stmt VS1_0, and, as usual, record a
9198 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9199 Next, we create the vector stmt VS1_1, and record a pointer to
9200 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9201 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9202 stmts and pointers:
9203 RELATED_STMT VEC_STMT
9204 VS1_0: vx0 = memref0 VS1_1 -
9205 VS1_1: vx1 = memref1 VS1_2 -
9206 VS1_2: vx2 = memref2 VS1_3 -
9207 VS1_3: vx3 = memref3 - -
9208 S1: x = load - VS1_0
9209 S2: z = x + 1 - -
9212 /* In case of interleaving (non-unit grouped access):
9214 S1: x2 = &base + 2
9215 S2: x0 = &base
9216 S3: x1 = &base + 1
9217 S4: x3 = &base + 3
9219 Vectorized loads are created in the order of memory accesses
9220 starting from the access of the first stmt of the chain:
9222 VS1: vx0 = &base
9223 VS2: vx1 = &base + vec_size*1
9224 VS3: vx3 = &base + vec_size*2
9225 VS4: vx4 = &base + vec_size*3
9227 Then permutation statements are generated:
9229 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9230 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9233 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9234 (the order of the data-refs in the output of vect_permute_load_chain
9235 corresponds to the order of scalar stmts in the interleaving chain - see
9236 the documentation of vect_permute_load_chain()).
9237 The generation of permutation stmts and recording them in
9238 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9240 In case of both multiple types and interleaving, the vector loads and
9241 permutation stmts above are created for every copy. The result vector
9242 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9243 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9245 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9246 on a target that supports unaligned accesses (dr_unaligned_supported)
9247 we generate the following code:
9248 p = initial_addr;
9249 indx = 0;
9250 loop {
9251 p = p + indx * vectype_size;
9252 vec_dest = *(p);
9253 indx = indx + 1;
9256 Otherwise, the data reference is potentially unaligned on a target that
9257 does not support unaligned accesses (dr_explicit_realign_optimized) -
9258 then generate the following code, in which the data in each iteration is
9259 obtained by two vector loads, one from the previous iteration, and one
9260 from the current iteration:
9261 p1 = initial_addr;
9262 msq_init = *(floor(p1))
9263 p2 = initial_addr + VS - 1;
9264 realignment_token = call target_builtin;
9265 indx = 0;
9266 loop {
9267 p2 = p2 + indx * vectype_size
9268 lsq = *(floor(p2))
9269 vec_dest = realign_load (msq, lsq, realignment_token)
9270 indx = indx + 1;
9271 msq = lsq;
9272 } */
9274 /* If the misalignment remains the same throughout the execution of the
9275 loop, we can create the init_addr and permutation mask at the loop
9276 preheader. Otherwise, it needs to be created inside the loop.
9277 This can only occur when vectorizing memory accesses in the inner-loop
9278 nested within an outer-loop that is being vectorized. */
9280 if (nested_in_vect_loop
9281 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9282 GET_MODE_SIZE (TYPE_MODE (vectype))))
9284 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9285 compute_in_loop = true;
9288 bool diff_first_stmt_info
9289 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9291 if ((alignment_support_scheme == dr_explicit_realign_optimized
9292 || alignment_support_scheme == dr_explicit_realign)
9293 && !compute_in_loop)
9295 /* If we have different first_stmt_info, we can't set up realignment
9296 here, since we can't guarantee first_stmt_info DR has been
9297 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9298 distance from first_stmt_info DR instead as below. */
9299 if (!diff_first_stmt_info)
9300 msq = vect_setup_realignment (vinfo,
9301 first_stmt_info, gsi, &realignment_token,
9302 alignment_support_scheme, NULL_TREE,
9303 &at_loop);
9304 if (alignment_support_scheme == dr_explicit_realign_optimized)
9306 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9307 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9308 size_one_node);
9309 gcc_assert (!first_stmt_info_for_drptr);
9312 else
9313 at_loop = loop;
9315 tree offset = NULL_TREE;
9316 if (!known_eq (poffset, 0))
9317 offset = size_int (poffset);
9318 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9319 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9321 tree bump;
9322 tree vec_offset = NULL_TREE;
9323 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9325 aggr_type = NULL_TREE;
9326 bump = NULL_TREE;
9328 else if (memory_access_type == VMAT_GATHER_SCATTER)
9330 aggr_type = elem_type;
9331 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9332 &bump, &vec_offset);
9334 else
9336 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9337 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9338 else
9339 aggr_type = vectype;
9340 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9341 memory_access_type);
9344 vec<tree> vec_offsets = vNULL;
9345 auto_vec<tree> vec_masks;
9346 if (mask)
9347 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9348 mask, &vec_masks, mask_vectype, NULL_TREE);
9349 tree vec_mask = NULL_TREE;
9350 poly_uint64 group_elt = 0;
9351 for (j = 0; j < ncopies; j++)
9353 /* 1. Create the vector or array pointer update chain. */
9354 if (j == 0)
9356 bool simd_lane_access_p
9357 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9358 if (simd_lane_access_p
9359 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9360 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9361 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9362 && integer_zerop (DR_INIT (first_dr_info->dr))
9363 && alias_sets_conflict_p (get_alias_set (aggr_type),
9364 get_alias_set (TREE_TYPE (ref_type)))
9365 && (alignment_support_scheme == dr_aligned
9366 || alignment_support_scheme == dr_unaligned_supported))
9368 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9369 dataref_offset = build_int_cst (ref_type, 0);
9371 else if (diff_first_stmt_info)
9373 dataref_ptr
9374 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9375 aggr_type, at_loop, offset, &dummy,
9376 gsi, &ptr_incr, simd_lane_access_p,
9377 byte_offset, bump);
9378 /* Adjust the pointer by the difference to first_stmt. */
9379 data_reference_p ptrdr
9380 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9381 tree diff
9382 = fold_convert (sizetype,
9383 size_binop (MINUS_EXPR,
9384 DR_INIT (first_dr_info->dr),
9385 DR_INIT (ptrdr)));
9386 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9387 stmt_info, diff);
9388 if (alignment_support_scheme == dr_explicit_realign)
9390 msq = vect_setup_realignment (vinfo,
9391 first_stmt_info_for_drptr, gsi,
9392 &realignment_token,
9393 alignment_support_scheme,
9394 dataref_ptr, &at_loop);
9395 gcc_assert (!compute_in_loop);
9398 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9400 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9401 &gs_info, &dataref_ptr,
9402 &vec_offsets);
9404 else
9405 dataref_ptr
9406 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9407 at_loop,
9408 offset, &dummy, gsi, &ptr_incr,
9409 simd_lane_access_p,
9410 byte_offset, bump);
9411 if (mask)
9412 vec_mask = vec_masks[0];
9414 else
9416 if (dataref_offset)
9417 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9418 bump);
9419 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9420 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9421 stmt_info, bump);
9422 if (mask)
9423 vec_mask = vec_masks[j];
9426 if (grouped_load || slp_perm)
9427 dr_chain.create (vec_num);
9429 gimple *new_stmt = NULL;
9430 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9432 tree vec_array;
9434 vec_array = create_vector_array (vectype, vec_num);
9436 tree final_mask = NULL_TREE;
9437 if (loop_masks)
9438 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9439 vectype, j);
9440 if (vec_mask)
9441 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9442 vec_mask, gsi);
9444 gcall *call;
9445 if (final_mask)
9447 /* Emit:
9448 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9449 VEC_MASK). */
9450 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9451 tree alias_ptr = build_int_cst (ref_type, align);
9452 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9453 dataref_ptr, alias_ptr,
9454 final_mask);
9456 else
9458 /* Emit:
9459 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9460 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9461 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9463 gimple_call_set_lhs (call, vec_array);
9464 gimple_call_set_nothrow (call, true);
9465 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9466 new_stmt = call;
9468 /* Extract each vector into an SSA_NAME. */
9469 for (i = 0; i < vec_num; i++)
9471 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9472 vec_array, i);
9473 dr_chain.quick_push (new_temp);
9476 /* Record the mapping between SSA_NAMEs and statements. */
9477 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9479 /* Record that VEC_ARRAY is now dead. */
9480 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9482 else
9484 for (i = 0; i < vec_num; i++)
9486 tree final_mask = NULL_TREE;
9487 if (loop_masks
9488 && memory_access_type != VMAT_INVARIANT)
9489 final_mask = vect_get_loop_mask (gsi, loop_masks,
9490 vec_num * ncopies,
9491 vectype, vec_num * j + i);
9492 if (vec_mask)
9493 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9494 vec_mask, gsi);
9496 if (i > 0)
9497 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9498 gsi, stmt_info, bump);
9500 /* 2. Create the vector-load in the loop. */
9501 switch (alignment_support_scheme)
9503 case dr_aligned:
9504 case dr_unaligned_supported:
9506 unsigned int misalign;
9507 unsigned HOST_WIDE_INT align;
9509 if (memory_access_type == VMAT_GATHER_SCATTER
9510 && gs_info.ifn != IFN_LAST)
9512 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9513 vec_offset = vec_offsets[j];
9514 tree zero = build_zero_cst (vectype);
9515 tree scale = size_int (gs_info.scale);
9516 gcall *call;
9517 if (final_mask)
9518 call = gimple_build_call_internal
9519 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9520 vec_offset, scale, zero, final_mask);
9521 else
9522 call = gimple_build_call_internal
9523 (IFN_GATHER_LOAD, 4, dataref_ptr,
9524 vec_offset, scale, zero);
9525 gimple_call_set_nothrow (call, true);
9526 new_stmt = call;
9527 data_ref = NULL_TREE;
9528 break;
9530 else if (memory_access_type == VMAT_GATHER_SCATTER)
9532 /* Emulated gather-scatter. */
9533 gcc_assert (!final_mask);
9534 unsigned HOST_WIDE_INT const_nunits
9535 = nunits.to_constant ();
9536 unsigned HOST_WIDE_INT const_offset_nunits
9537 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9538 .to_constant ();
9539 vec<constructor_elt, va_gc> *ctor_elts;
9540 vec_alloc (ctor_elts, const_nunits);
9541 gimple_seq stmts = NULL;
9542 /* We support offset vectors with more elements
9543 than the data vector for now. */
9544 unsigned HOST_WIDE_INT factor
9545 = const_offset_nunits / const_nunits;
9546 vec_offset = vec_offsets[j / factor];
9547 unsigned elt_offset = (j % factor) * const_nunits;
9548 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9549 tree scale = size_int (gs_info.scale);
9550 align
9551 = get_object_alignment (DR_REF (first_dr_info->dr));
9552 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9553 align);
9554 for (unsigned k = 0; k < const_nunits; ++k)
9556 tree boff = size_binop (MULT_EXPR,
9557 TYPE_SIZE (idx_type),
9558 bitsize_int
9559 (k + elt_offset));
9560 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9561 idx_type, vec_offset,
9562 TYPE_SIZE (idx_type),
9563 boff);
9564 idx = gimple_convert (&stmts, sizetype, idx);
9565 idx = gimple_build (&stmts, MULT_EXPR,
9566 sizetype, idx, scale);
9567 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9568 TREE_TYPE (dataref_ptr),
9569 dataref_ptr, idx);
9570 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9571 tree elt = make_ssa_name (TREE_TYPE (vectype));
9572 tree ref = build2 (MEM_REF, ltype, ptr,
9573 build_int_cst (ref_type, 0));
9574 new_stmt = gimple_build_assign (elt, ref);
9575 gimple_seq_add_stmt (&stmts, new_stmt);
9576 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9578 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9579 new_stmt = gimple_build_assign (NULL_TREE,
9580 build_constructor
9581 (vectype, ctor_elts));
9582 data_ref = NULL_TREE;
9583 break;
9586 align =
9587 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9588 if (alignment_support_scheme == dr_aligned)
9590 gcc_assert (aligned_access_p (first_dr_info, vectype));
9591 misalign = 0;
9593 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9595 align = dr_alignment
9596 (vect_dr_behavior (vinfo, first_dr_info));
9597 misalign = 0;
9599 else
9600 misalign = misalignment;
9601 if (dataref_offset == NULL_TREE
9602 && TREE_CODE (dataref_ptr) == SSA_NAME)
9603 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9604 align, misalign);
9605 align = least_bit_hwi (misalign | align);
9607 if (final_mask)
9609 tree ptr = build_int_cst (ref_type,
9610 align * BITS_PER_UNIT);
9611 gcall *call
9612 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9613 dataref_ptr, ptr,
9614 final_mask);
9615 gimple_call_set_nothrow (call, true);
9616 new_stmt = call;
9617 data_ref = NULL_TREE;
9619 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9621 tree final_len
9622 = vect_get_loop_len (loop_vinfo, loop_lens,
9623 vec_num * ncopies,
9624 vec_num * j + i);
9625 tree ptr = build_int_cst (ref_type,
9626 align * BITS_PER_UNIT);
9627 gcall *call
9628 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9629 dataref_ptr, ptr,
9630 final_len);
9631 gimple_call_set_nothrow (call, true);
9632 new_stmt = call;
9633 data_ref = NULL_TREE;
9635 /* Need conversion if it's wrapped with VnQI. */
9636 machine_mode vmode = TYPE_MODE (vectype);
9637 opt_machine_mode new_ovmode
9638 = get_len_load_store_mode (vmode, true);
9639 machine_mode new_vmode = new_ovmode.require ();
9640 if (vmode != new_vmode)
9642 tree qi_type = unsigned_intQI_type_node;
9643 tree new_vtype
9644 = build_vector_type_for_mode (qi_type, new_vmode);
9645 tree var = vect_get_new_ssa_name (new_vtype,
9646 vect_simple_var);
9647 gimple_set_lhs (call, var);
9648 vect_finish_stmt_generation (vinfo, stmt_info, call,
9649 gsi);
9650 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9651 new_stmt
9652 = gimple_build_assign (vec_dest,
9653 VIEW_CONVERT_EXPR, op);
9656 else
9658 tree ltype = vectype;
9659 tree new_vtype = NULL_TREE;
9660 unsigned HOST_WIDE_INT gap
9661 = DR_GROUP_GAP (first_stmt_info);
9662 unsigned int vect_align
9663 = vect_known_alignment_in_bytes (first_dr_info,
9664 vectype);
9665 unsigned int scalar_dr_size
9666 = vect_get_scalar_dr_size (first_dr_info);
9667 /* If there's no peeling for gaps but we have a gap
9668 with slp loads then load the lower half of the
9669 vector only. See get_group_load_store_type for
9670 when we apply this optimization. */
9671 if (slp
9672 && loop_vinfo
9673 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9674 && gap != 0
9675 && known_eq (nunits, (group_size - gap) * 2)
9676 && known_eq (nunits, group_size)
9677 && gap >= (vect_align / scalar_dr_size))
9679 tree half_vtype;
9680 new_vtype
9681 = vector_vector_composition_type (vectype, 2,
9682 &half_vtype);
9683 if (new_vtype != NULL_TREE)
9684 ltype = half_vtype;
9686 tree offset
9687 = (dataref_offset ? dataref_offset
9688 : build_int_cst (ref_type, 0));
9689 if (ltype != vectype
9690 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9692 unsigned HOST_WIDE_INT gap_offset
9693 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9694 tree gapcst = build_int_cst (ref_type, gap_offset);
9695 offset = size_binop (PLUS_EXPR, offset, gapcst);
9697 data_ref
9698 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9699 if (alignment_support_scheme == dr_aligned)
9701 else
9702 TREE_TYPE (data_ref)
9703 = build_aligned_type (TREE_TYPE (data_ref),
9704 align * BITS_PER_UNIT);
9705 if (ltype != vectype)
9707 vect_copy_ref_info (data_ref,
9708 DR_REF (first_dr_info->dr));
9709 tree tem = make_ssa_name (ltype);
9710 new_stmt = gimple_build_assign (tem, data_ref);
9711 vect_finish_stmt_generation (vinfo, stmt_info,
9712 new_stmt, gsi);
9713 data_ref = NULL;
9714 vec<constructor_elt, va_gc> *v;
9715 vec_alloc (v, 2);
9716 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9718 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9719 build_zero_cst (ltype));
9720 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9722 else
9724 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9725 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9726 build_zero_cst (ltype));
9728 gcc_assert (new_vtype != NULL_TREE);
9729 if (new_vtype == vectype)
9730 new_stmt = gimple_build_assign (
9731 vec_dest, build_constructor (vectype, v));
9732 else
9734 tree new_vname = make_ssa_name (new_vtype);
9735 new_stmt = gimple_build_assign (
9736 new_vname, build_constructor (new_vtype, v));
9737 vect_finish_stmt_generation (vinfo, stmt_info,
9738 new_stmt, gsi);
9739 new_stmt = gimple_build_assign (
9740 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9741 new_vname));
9745 break;
9747 case dr_explicit_realign:
9749 tree ptr, bump;
9751 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9753 if (compute_in_loop)
9754 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9755 &realignment_token,
9756 dr_explicit_realign,
9757 dataref_ptr, NULL);
9759 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9760 ptr = copy_ssa_name (dataref_ptr);
9761 else
9762 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9763 // For explicit realign the target alignment should be
9764 // known at compile time.
9765 unsigned HOST_WIDE_INT align =
9766 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9767 new_stmt = gimple_build_assign
9768 (ptr, BIT_AND_EXPR, dataref_ptr,
9769 build_int_cst
9770 (TREE_TYPE (dataref_ptr),
9771 -(HOST_WIDE_INT) align));
9772 vect_finish_stmt_generation (vinfo, stmt_info,
9773 new_stmt, gsi);
9774 data_ref
9775 = build2 (MEM_REF, vectype, ptr,
9776 build_int_cst (ref_type, 0));
9777 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9778 vec_dest = vect_create_destination_var (scalar_dest,
9779 vectype);
9780 new_stmt = gimple_build_assign (vec_dest, data_ref);
9781 new_temp = make_ssa_name (vec_dest, new_stmt);
9782 gimple_assign_set_lhs (new_stmt, new_temp);
9783 gimple_move_vops (new_stmt, stmt_info->stmt);
9784 vect_finish_stmt_generation (vinfo, stmt_info,
9785 new_stmt, gsi);
9786 msq = new_temp;
9788 bump = size_binop (MULT_EXPR, vs,
9789 TYPE_SIZE_UNIT (elem_type));
9790 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9791 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9792 stmt_info, bump);
9793 new_stmt = gimple_build_assign
9794 (NULL_TREE, BIT_AND_EXPR, ptr,
9795 build_int_cst
9796 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9797 ptr = copy_ssa_name (ptr, new_stmt);
9798 gimple_assign_set_lhs (new_stmt, ptr);
9799 vect_finish_stmt_generation (vinfo, stmt_info,
9800 new_stmt, gsi);
9801 data_ref
9802 = build2 (MEM_REF, vectype, ptr,
9803 build_int_cst (ref_type, 0));
9804 break;
9806 case dr_explicit_realign_optimized:
9808 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9809 new_temp = copy_ssa_name (dataref_ptr);
9810 else
9811 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9812 // We should only be doing this if we know the target
9813 // alignment at compile time.
9814 unsigned HOST_WIDE_INT align =
9815 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9816 new_stmt = gimple_build_assign
9817 (new_temp, BIT_AND_EXPR, dataref_ptr,
9818 build_int_cst (TREE_TYPE (dataref_ptr),
9819 -(HOST_WIDE_INT) align));
9820 vect_finish_stmt_generation (vinfo, stmt_info,
9821 new_stmt, gsi);
9822 data_ref
9823 = build2 (MEM_REF, vectype, new_temp,
9824 build_int_cst (ref_type, 0));
9825 break;
9827 default:
9828 gcc_unreachable ();
9830 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9831 /* DATA_REF is null if we've already built the statement. */
9832 if (data_ref)
9834 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9835 new_stmt = gimple_build_assign (vec_dest, data_ref);
9837 new_temp = make_ssa_name (vec_dest, new_stmt);
9838 gimple_set_lhs (new_stmt, new_temp);
9839 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9841 /* 3. Handle explicit realignment if necessary/supported.
9842 Create in loop:
9843 vec_dest = realign_load (msq, lsq, realignment_token) */
9844 if (alignment_support_scheme == dr_explicit_realign_optimized
9845 || alignment_support_scheme == dr_explicit_realign)
9847 lsq = gimple_assign_lhs (new_stmt);
9848 if (!realignment_token)
9849 realignment_token = dataref_ptr;
9850 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9851 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9852 msq, lsq, realignment_token);
9853 new_temp = make_ssa_name (vec_dest, new_stmt);
9854 gimple_assign_set_lhs (new_stmt, new_temp);
9855 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9857 if (alignment_support_scheme == dr_explicit_realign_optimized)
9859 gcc_assert (phi);
9860 if (i == vec_num - 1 && j == ncopies - 1)
9861 add_phi_arg (phi, lsq,
9862 loop_latch_edge (containing_loop),
9863 UNKNOWN_LOCATION);
9864 msq = lsq;
9868 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9870 tree perm_mask = perm_mask_for_reverse (vectype);
9871 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9872 perm_mask, stmt_info, gsi);
9873 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9876 /* Collect vector loads and later create their permutation in
9877 vect_transform_grouped_load (). */
9878 if (grouped_load || slp_perm)
9879 dr_chain.quick_push (new_temp);
9881 /* Store vector loads in the corresponding SLP_NODE. */
9882 if (slp && !slp_perm)
9883 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9885 /* With SLP permutation we load the gaps as well, without
9886 we need to skip the gaps after we manage to fully load
9887 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9888 group_elt += nunits;
9889 if (maybe_ne (group_gap_adj, 0U)
9890 && !slp_perm
9891 && known_eq (group_elt, group_size - group_gap_adj))
9893 poly_wide_int bump_val
9894 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9895 * group_gap_adj);
9896 if (tree_int_cst_sgn
9897 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9898 bump_val = -bump_val;
9899 tree bump = wide_int_to_tree (sizetype, bump_val);
9900 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9901 gsi, stmt_info, bump);
9902 group_elt = 0;
9905 /* Bump the vector pointer to account for a gap or for excess
9906 elements loaded for a permuted SLP load. */
9907 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9909 poly_wide_int bump_val
9910 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9911 * group_gap_adj);
9912 if (tree_int_cst_sgn
9913 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9914 bump_val = -bump_val;
9915 tree bump = wide_int_to_tree (sizetype, bump_val);
9916 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9917 stmt_info, bump);
9921 if (slp && !slp_perm)
9922 continue;
9924 if (slp_perm)
9926 unsigned n_perms;
9927 /* For SLP we know we've seen all possible uses of dr_chain so
9928 direct vect_transform_slp_perm_load to DCE the unused parts.
9929 ??? This is a hack to prevent compile-time issues as seen
9930 in PR101120 and friends. */
9931 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9932 gsi, vf, false, &n_perms,
9933 nullptr, true);
9934 gcc_assert (ok);
9936 else
9938 if (grouped_load)
9940 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9941 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9942 group_size, gsi);
9943 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9945 else
9947 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9950 dr_chain.release ();
9952 if (!slp)
9953 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9955 return true;
9958 /* Function vect_is_simple_cond.
9960 Input:
9961 LOOP - the loop that is being vectorized.
9962 COND - Condition that is checked for simple use.
9964 Output:
9965 *COMP_VECTYPE - the vector type for the comparison.
9966 *DTS - The def types for the arguments of the comparison
9968 Returns whether a COND can be vectorized. Checks whether
9969 condition operands are supportable using vec_is_simple_use. */
9971 static bool
9972 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9973 slp_tree slp_node, tree *comp_vectype,
9974 enum vect_def_type *dts, tree vectype)
9976 tree lhs, rhs;
9977 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9978 slp_tree slp_op;
9980 /* Mask case. */
9981 if (TREE_CODE (cond) == SSA_NAME
9982 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9984 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9985 &slp_op, &dts[0], comp_vectype)
9986 || !*comp_vectype
9987 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9988 return false;
9989 return true;
9992 if (!COMPARISON_CLASS_P (cond))
9993 return false;
9995 lhs = TREE_OPERAND (cond, 0);
9996 rhs = TREE_OPERAND (cond, 1);
9998 if (TREE_CODE (lhs) == SSA_NAME)
10000 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10001 &lhs, &slp_op, &dts[0], &vectype1))
10002 return false;
10004 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10005 || TREE_CODE (lhs) == FIXED_CST)
10006 dts[0] = vect_constant_def;
10007 else
10008 return false;
10010 if (TREE_CODE (rhs) == SSA_NAME)
10012 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10013 &rhs, &slp_op, &dts[1], &vectype2))
10014 return false;
10016 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10017 || TREE_CODE (rhs) == FIXED_CST)
10018 dts[1] = vect_constant_def;
10019 else
10020 return false;
10022 if (vectype1 && vectype2
10023 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10024 TYPE_VECTOR_SUBPARTS (vectype2)))
10025 return false;
10027 *comp_vectype = vectype1 ? vectype1 : vectype2;
10028 /* Invariant comparison. */
10029 if (! *comp_vectype)
10031 tree scalar_type = TREE_TYPE (lhs);
10032 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10033 *comp_vectype = truth_type_for (vectype);
10034 else
10036 /* If we can widen the comparison to match vectype do so. */
10037 if (INTEGRAL_TYPE_P (scalar_type)
10038 && !slp_node
10039 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10040 TYPE_SIZE (TREE_TYPE (vectype))))
10041 scalar_type = build_nonstandard_integer_type
10042 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10043 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10044 slp_node);
10048 return true;
10051 /* vectorizable_condition.
10053 Check if STMT_INFO is conditional modify expression that can be vectorized.
10054 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10055 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10056 at GSI.
10058 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10060 Return true if STMT_INFO is vectorizable in this way. */
10062 static bool
10063 vectorizable_condition (vec_info *vinfo,
10064 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10065 gimple **vec_stmt,
10066 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10068 tree scalar_dest = NULL_TREE;
10069 tree vec_dest = NULL_TREE;
10070 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10071 tree then_clause, else_clause;
10072 tree comp_vectype = NULL_TREE;
10073 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10074 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10075 tree vec_compare;
10076 tree new_temp;
10077 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10078 enum vect_def_type dts[4]
10079 = {vect_unknown_def_type, vect_unknown_def_type,
10080 vect_unknown_def_type, vect_unknown_def_type};
10081 int ndts = 4;
10082 int ncopies;
10083 int vec_num;
10084 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10085 int i;
10086 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10087 vec<tree> vec_oprnds0 = vNULL;
10088 vec<tree> vec_oprnds1 = vNULL;
10089 vec<tree> vec_oprnds2 = vNULL;
10090 vec<tree> vec_oprnds3 = vNULL;
10091 tree vec_cmp_type;
10092 bool masked = false;
10094 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10095 return false;
10097 /* Is vectorizable conditional operation? */
10098 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10099 if (!stmt)
10100 return false;
10102 code = gimple_assign_rhs_code (stmt);
10103 if (code != COND_EXPR)
10104 return false;
10106 stmt_vec_info reduc_info = NULL;
10107 int reduc_index = -1;
10108 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10109 bool for_reduction
10110 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10111 if (for_reduction)
10113 if (STMT_SLP_TYPE (stmt_info))
10114 return false;
10115 reduc_info = info_for_reduction (vinfo, stmt_info);
10116 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10117 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10118 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10119 || reduc_index != -1);
10121 else
10123 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10124 return false;
10127 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10128 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10130 if (slp_node)
10132 ncopies = 1;
10133 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10135 else
10137 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10138 vec_num = 1;
10141 gcc_assert (ncopies >= 1);
10142 if (for_reduction && ncopies > 1)
10143 return false; /* FORNOW */
10145 cond_expr = gimple_assign_rhs1 (stmt);
10147 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10148 &comp_vectype, &dts[0], vectype)
10149 || !comp_vectype)
10150 return false;
10152 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10153 slp_tree then_slp_node, else_slp_node;
10154 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10155 &then_clause, &then_slp_node, &dts[2], &vectype1))
10156 return false;
10157 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10158 &else_clause, &else_slp_node, &dts[3], &vectype2))
10159 return false;
10161 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10162 return false;
10164 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10165 return false;
10167 masked = !COMPARISON_CLASS_P (cond_expr);
10168 vec_cmp_type = truth_type_for (comp_vectype);
10170 if (vec_cmp_type == NULL_TREE)
10171 return false;
10173 cond_code = TREE_CODE (cond_expr);
10174 if (!masked)
10176 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10177 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10180 /* For conditional reductions, the "then" value needs to be the candidate
10181 value calculated by this iteration while the "else" value needs to be
10182 the result carried over from previous iterations. If the COND_EXPR
10183 is the other way around, we need to swap it. */
10184 bool must_invert_cmp_result = false;
10185 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10187 if (masked)
10188 must_invert_cmp_result = true;
10189 else
10191 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10192 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10193 if (new_code == ERROR_MARK)
10194 must_invert_cmp_result = true;
10195 else
10197 cond_code = new_code;
10198 /* Make sure we don't accidentally use the old condition. */
10199 cond_expr = NULL_TREE;
10202 std::swap (then_clause, else_clause);
10205 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10207 /* Boolean values may have another representation in vectors
10208 and therefore we prefer bit operations over comparison for
10209 them (which also works for scalar masks). We store opcodes
10210 to use in bitop1 and bitop2. Statement is vectorized as
10211 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10212 depending on bitop1 and bitop2 arity. */
10213 switch (cond_code)
10215 case GT_EXPR:
10216 bitop1 = BIT_NOT_EXPR;
10217 bitop2 = BIT_AND_EXPR;
10218 break;
10219 case GE_EXPR:
10220 bitop1 = BIT_NOT_EXPR;
10221 bitop2 = BIT_IOR_EXPR;
10222 break;
10223 case LT_EXPR:
10224 bitop1 = BIT_NOT_EXPR;
10225 bitop2 = BIT_AND_EXPR;
10226 std::swap (cond_expr0, cond_expr1);
10227 break;
10228 case LE_EXPR:
10229 bitop1 = BIT_NOT_EXPR;
10230 bitop2 = BIT_IOR_EXPR;
10231 std::swap (cond_expr0, cond_expr1);
10232 break;
10233 case NE_EXPR:
10234 bitop1 = BIT_XOR_EXPR;
10235 break;
10236 case EQ_EXPR:
10237 bitop1 = BIT_XOR_EXPR;
10238 bitop2 = BIT_NOT_EXPR;
10239 break;
10240 default:
10241 return false;
10243 cond_code = SSA_NAME;
10246 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10247 && reduction_type == EXTRACT_LAST_REDUCTION
10248 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10250 if (dump_enabled_p ())
10251 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10252 "reduction comparison operation not supported.\n");
10253 return false;
10256 if (!vec_stmt)
10258 if (bitop1 != NOP_EXPR)
10260 machine_mode mode = TYPE_MODE (comp_vectype);
10261 optab optab;
10263 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10264 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10265 return false;
10267 if (bitop2 != NOP_EXPR)
10269 optab = optab_for_tree_code (bitop2, comp_vectype,
10270 optab_default);
10271 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10272 return false;
10276 vect_cost_for_stmt kind = vector_stmt;
10277 if (reduction_type == EXTRACT_LAST_REDUCTION)
10278 /* Count one reduction-like operation per vector. */
10279 kind = vec_to_scalar;
10280 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10281 return false;
10283 if (slp_node
10284 && (!vect_maybe_update_slp_op_vectype
10285 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10286 || (op_adjust == 1
10287 && !vect_maybe_update_slp_op_vectype
10288 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10289 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10290 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10292 if (dump_enabled_p ())
10293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10294 "incompatible vector types for invariants\n");
10295 return false;
10298 if (loop_vinfo && for_reduction
10299 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10301 if (reduction_type == EXTRACT_LAST_REDUCTION)
10302 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10303 ncopies * vec_num, vectype, NULL);
10304 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10305 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10307 if (dump_enabled_p ())
10308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10309 "conditional reduction prevents the use"
10310 " of partial vectors.\n");
10311 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10315 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10316 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10317 cost_vec, kind);
10318 return true;
10321 /* Transform. */
10323 /* Handle def. */
10324 scalar_dest = gimple_assign_lhs (stmt);
10325 if (reduction_type != EXTRACT_LAST_REDUCTION)
10326 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10328 bool swap_cond_operands = false;
10330 /* See whether another part of the vectorized code applies a loop
10331 mask to the condition, or to its inverse. */
10333 vec_loop_masks *masks = NULL;
10334 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10336 if (reduction_type == EXTRACT_LAST_REDUCTION)
10337 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10338 else
10340 scalar_cond_masked_key cond (cond_expr, ncopies);
10341 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10342 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10343 else
10345 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10346 cond.code = invert_tree_comparison (cond.code, honor_nans);
10347 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10349 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10350 cond_code = cond.code;
10351 swap_cond_operands = true;
10357 /* Handle cond expr. */
10358 if (masked)
10359 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10360 cond_expr, &vec_oprnds0, comp_vectype,
10361 then_clause, &vec_oprnds2, vectype,
10362 reduction_type != EXTRACT_LAST_REDUCTION
10363 ? else_clause : NULL, &vec_oprnds3, vectype);
10364 else
10365 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10366 cond_expr0, &vec_oprnds0, comp_vectype,
10367 cond_expr1, &vec_oprnds1, comp_vectype,
10368 then_clause, &vec_oprnds2, vectype,
10369 reduction_type != EXTRACT_LAST_REDUCTION
10370 ? else_clause : NULL, &vec_oprnds3, vectype);
10372 /* Arguments are ready. Create the new vector stmt. */
10373 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10375 vec_then_clause = vec_oprnds2[i];
10376 if (reduction_type != EXTRACT_LAST_REDUCTION)
10377 vec_else_clause = vec_oprnds3[i];
10379 if (swap_cond_operands)
10380 std::swap (vec_then_clause, vec_else_clause);
10382 if (masked)
10383 vec_compare = vec_cond_lhs;
10384 else
10386 vec_cond_rhs = vec_oprnds1[i];
10387 if (bitop1 == NOP_EXPR)
10389 gimple_seq stmts = NULL;
10390 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10391 vec_cond_lhs, vec_cond_rhs);
10392 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10394 else
10396 new_temp = make_ssa_name (vec_cmp_type);
10397 gassign *new_stmt;
10398 if (bitop1 == BIT_NOT_EXPR)
10399 new_stmt = gimple_build_assign (new_temp, bitop1,
10400 vec_cond_rhs);
10401 else
10402 new_stmt
10403 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10404 vec_cond_rhs);
10405 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10406 if (bitop2 == NOP_EXPR)
10407 vec_compare = new_temp;
10408 else if (bitop2 == BIT_NOT_EXPR)
10410 /* Instead of doing ~x ? y : z do x ? z : y. */
10411 vec_compare = new_temp;
10412 std::swap (vec_then_clause, vec_else_clause);
10414 else
10416 vec_compare = make_ssa_name (vec_cmp_type);
10417 new_stmt
10418 = gimple_build_assign (vec_compare, bitop2,
10419 vec_cond_lhs, new_temp);
10420 vect_finish_stmt_generation (vinfo, stmt_info,
10421 new_stmt, gsi);
10426 /* If we decided to apply a loop mask to the result of the vector
10427 comparison, AND the comparison with the mask now. Later passes
10428 should then be able to reuse the AND results between mulitple
10429 vector statements.
10431 For example:
10432 for (int i = 0; i < 100; ++i)
10433 x[i] = y[i] ? z[i] : 10;
10435 results in following optimized GIMPLE:
10437 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10438 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10439 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10440 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10441 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10442 vect_iftmp.11_47, { 10, ... }>;
10444 instead of using a masked and unmasked forms of
10445 vec != { 0, ... } (masked in the MASK_LOAD,
10446 unmasked in the VEC_COND_EXPR). */
10448 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10449 in cases where that's necessary. */
10451 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10453 if (!is_gimple_val (vec_compare))
10455 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10456 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10457 vec_compare);
10458 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10459 vec_compare = vec_compare_name;
10462 if (must_invert_cmp_result)
10464 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10465 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10466 BIT_NOT_EXPR,
10467 vec_compare);
10468 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10469 vec_compare = vec_compare_name;
10472 if (masks)
10474 tree loop_mask
10475 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10476 vectype, i);
10477 tree tmp2 = make_ssa_name (vec_cmp_type);
10478 gassign *g
10479 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10480 loop_mask);
10481 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10482 vec_compare = tmp2;
10486 gimple *new_stmt;
10487 if (reduction_type == EXTRACT_LAST_REDUCTION)
10489 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10490 tree lhs = gimple_get_lhs (old_stmt);
10491 new_stmt = gimple_build_call_internal
10492 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10493 vec_then_clause);
10494 gimple_call_set_lhs (new_stmt, lhs);
10495 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10496 if (old_stmt == gsi_stmt (*gsi))
10497 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10498 else
10500 /* In this case we're moving the definition to later in the
10501 block. That doesn't matter because the only uses of the
10502 lhs are in phi statements. */
10503 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10504 gsi_remove (&old_gsi, true);
10505 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10508 else
10510 new_temp = make_ssa_name (vec_dest);
10511 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10512 vec_then_clause, vec_else_clause);
10513 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10515 if (slp_node)
10516 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10517 else
10518 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10521 if (!slp_node)
10522 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10524 vec_oprnds0.release ();
10525 vec_oprnds1.release ();
10526 vec_oprnds2.release ();
10527 vec_oprnds3.release ();
10529 return true;
10532 /* vectorizable_comparison.
10534 Check if STMT_INFO is comparison expression that can be vectorized.
10535 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10536 comparison, put it in VEC_STMT, and insert it at GSI.
10538 Return true if STMT_INFO is vectorizable in this way. */
10540 static bool
10541 vectorizable_comparison (vec_info *vinfo,
10542 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10543 gimple **vec_stmt,
10544 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10546 tree lhs, rhs1, rhs2;
10547 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10548 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10549 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10550 tree new_temp;
10551 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10552 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10553 int ndts = 2;
10554 poly_uint64 nunits;
10555 int ncopies;
10556 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10557 int i;
10558 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10559 vec<tree> vec_oprnds0 = vNULL;
10560 vec<tree> vec_oprnds1 = vNULL;
10561 tree mask_type;
10562 tree mask;
10564 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10565 return false;
10567 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10568 return false;
10570 mask_type = vectype;
10571 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10573 if (slp_node)
10574 ncopies = 1;
10575 else
10576 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10578 gcc_assert (ncopies >= 1);
10579 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10580 return false;
10582 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10583 if (!stmt)
10584 return false;
10586 code = gimple_assign_rhs_code (stmt);
10588 if (TREE_CODE_CLASS (code) != tcc_comparison)
10589 return false;
10591 slp_tree slp_rhs1, slp_rhs2;
10592 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10593 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10594 return false;
10596 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10597 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10598 return false;
10600 if (vectype1 && vectype2
10601 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10602 TYPE_VECTOR_SUBPARTS (vectype2)))
10603 return false;
10605 vectype = vectype1 ? vectype1 : vectype2;
10607 /* Invariant comparison. */
10608 if (!vectype)
10610 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10611 vectype = mask_type;
10612 else
10613 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10614 slp_node);
10615 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10616 return false;
10618 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10619 return false;
10621 /* Can't compare mask and non-mask types. */
10622 if (vectype1 && vectype2
10623 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10624 return false;
10626 /* Boolean values may have another representation in vectors
10627 and therefore we prefer bit operations over comparison for
10628 them (which also works for scalar masks). We store opcodes
10629 to use in bitop1 and bitop2. Statement is vectorized as
10630 BITOP2 (rhs1 BITOP1 rhs2) or
10631 rhs1 BITOP2 (BITOP1 rhs2)
10632 depending on bitop1 and bitop2 arity. */
10633 bool swap_p = false;
10634 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10636 if (code == GT_EXPR)
10638 bitop1 = BIT_NOT_EXPR;
10639 bitop2 = BIT_AND_EXPR;
10641 else if (code == GE_EXPR)
10643 bitop1 = BIT_NOT_EXPR;
10644 bitop2 = BIT_IOR_EXPR;
10646 else if (code == LT_EXPR)
10648 bitop1 = BIT_NOT_EXPR;
10649 bitop2 = BIT_AND_EXPR;
10650 swap_p = true;
10652 else if (code == LE_EXPR)
10654 bitop1 = BIT_NOT_EXPR;
10655 bitop2 = BIT_IOR_EXPR;
10656 swap_p = true;
10658 else
10660 bitop1 = BIT_XOR_EXPR;
10661 if (code == EQ_EXPR)
10662 bitop2 = BIT_NOT_EXPR;
10666 if (!vec_stmt)
10668 if (bitop1 == NOP_EXPR)
10670 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10671 return false;
10673 else
10675 machine_mode mode = TYPE_MODE (vectype);
10676 optab optab;
10678 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10679 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10680 return false;
10682 if (bitop2 != NOP_EXPR)
10684 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10685 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10686 return false;
10690 /* Put types on constant and invariant SLP children. */
10691 if (slp_node
10692 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10693 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10695 if (dump_enabled_p ())
10696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10697 "incompatible vector types for invariants\n");
10698 return false;
10701 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10702 vect_model_simple_cost (vinfo, stmt_info,
10703 ncopies * (1 + (bitop2 != NOP_EXPR)),
10704 dts, ndts, slp_node, cost_vec);
10705 return true;
10708 /* Transform. */
10710 /* Handle def. */
10711 lhs = gimple_assign_lhs (stmt);
10712 mask = vect_create_destination_var (lhs, mask_type);
10714 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10715 rhs1, &vec_oprnds0, vectype,
10716 rhs2, &vec_oprnds1, vectype);
10717 if (swap_p)
10718 std::swap (vec_oprnds0, vec_oprnds1);
10720 /* Arguments are ready. Create the new vector stmt. */
10721 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10723 gimple *new_stmt;
10724 vec_rhs2 = vec_oprnds1[i];
10726 new_temp = make_ssa_name (mask);
10727 if (bitop1 == NOP_EXPR)
10729 new_stmt = gimple_build_assign (new_temp, code,
10730 vec_rhs1, vec_rhs2);
10731 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10733 else
10735 if (bitop1 == BIT_NOT_EXPR)
10736 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10737 else
10738 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10739 vec_rhs2);
10740 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10741 if (bitop2 != NOP_EXPR)
10743 tree res = make_ssa_name (mask);
10744 if (bitop2 == BIT_NOT_EXPR)
10745 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10746 else
10747 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10748 new_temp);
10749 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10752 if (slp_node)
10753 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10754 else
10755 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10758 if (!slp_node)
10759 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10761 vec_oprnds0.release ();
10762 vec_oprnds1.release ();
10764 return true;
10767 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10768 can handle all live statements in the node. Otherwise return true
10769 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10770 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10772 static bool
10773 can_vectorize_live_stmts (vec_info *vinfo,
10774 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10775 slp_tree slp_node, slp_instance slp_node_instance,
10776 bool vec_stmt_p,
10777 stmt_vector_for_cost *cost_vec)
10779 if (slp_node)
10781 stmt_vec_info slp_stmt_info;
10782 unsigned int i;
10783 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10785 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10786 && !vectorizable_live_operation (vinfo,
10787 slp_stmt_info, gsi, slp_node,
10788 slp_node_instance, i,
10789 vec_stmt_p, cost_vec))
10790 return false;
10793 else if (STMT_VINFO_LIVE_P (stmt_info)
10794 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10795 slp_node, slp_node_instance, -1,
10796 vec_stmt_p, cost_vec))
10797 return false;
10799 return true;
10802 /* Make sure the statement is vectorizable. */
10804 opt_result
10805 vect_analyze_stmt (vec_info *vinfo,
10806 stmt_vec_info stmt_info, bool *need_to_vectorize,
10807 slp_tree node, slp_instance node_instance,
10808 stmt_vector_for_cost *cost_vec)
10810 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10811 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10812 bool ok;
10813 gimple_seq pattern_def_seq;
10815 if (dump_enabled_p ())
10816 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10817 stmt_info->stmt);
10819 if (gimple_has_volatile_ops (stmt_info->stmt))
10820 return opt_result::failure_at (stmt_info->stmt,
10821 "not vectorized:"
10822 " stmt has volatile operands: %G\n",
10823 stmt_info->stmt);
10825 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10826 && node == NULL
10827 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10829 gimple_stmt_iterator si;
10831 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10833 stmt_vec_info pattern_def_stmt_info
10834 = vinfo->lookup_stmt (gsi_stmt (si));
10835 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10836 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10838 /* Analyze def stmt of STMT if it's a pattern stmt. */
10839 if (dump_enabled_p ())
10840 dump_printf_loc (MSG_NOTE, vect_location,
10841 "==> examining pattern def statement: %G",
10842 pattern_def_stmt_info->stmt);
10844 opt_result res
10845 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10846 need_to_vectorize, node, node_instance,
10847 cost_vec);
10848 if (!res)
10849 return res;
10854 /* Skip stmts that do not need to be vectorized. In loops this is expected
10855 to include:
10856 - the COND_EXPR which is the loop exit condition
10857 - any LABEL_EXPRs in the loop
10858 - computations that are used only for array indexing or loop control.
10859 In basic blocks we only analyze statements that are a part of some SLP
10860 instance, therefore, all the statements are relevant.
10862 Pattern statement needs to be analyzed instead of the original statement
10863 if the original statement is not relevant. Otherwise, we analyze both
10864 statements. In basic blocks we are called from some SLP instance
10865 traversal, don't analyze pattern stmts instead, the pattern stmts
10866 already will be part of SLP instance. */
10868 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10869 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10870 && !STMT_VINFO_LIVE_P (stmt_info))
10872 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10873 && pattern_stmt_info
10874 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10875 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10877 /* Analyze PATTERN_STMT instead of the original stmt. */
10878 stmt_info = pattern_stmt_info;
10879 if (dump_enabled_p ())
10880 dump_printf_loc (MSG_NOTE, vect_location,
10881 "==> examining pattern statement: %G",
10882 stmt_info->stmt);
10884 else
10886 if (dump_enabled_p ())
10887 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10889 return opt_result::success ();
10892 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10893 && node == NULL
10894 && pattern_stmt_info
10895 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10896 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10898 /* Analyze PATTERN_STMT too. */
10899 if (dump_enabled_p ())
10900 dump_printf_loc (MSG_NOTE, vect_location,
10901 "==> examining pattern statement: %G",
10902 pattern_stmt_info->stmt);
10904 opt_result res
10905 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10906 node_instance, cost_vec);
10907 if (!res)
10908 return res;
10911 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10913 case vect_internal_def:
10914 break;
10916 case vect_reduction_def:
10917 case vect_nested_cycle:
10918 gcc_assert (!bb_vinfo
10919 && (relevance == vect_used_in_outer
10920 || relevance == vect_used_in_outer_by_reduction
10921 || relevance == vect_used_by_reduction
10922 || relevance == vect_unused_in_scope
10923 || relevance == vect_used_only_live));
10924 break;
10926 case vect_induction_def:
10927 gcc_assert (!bb_vinfo);
10928 break;
10930 case vect_constant_def:
10931 case vect_external_def:
10932 case vect_unknown_def_type:
10933 default:
10934 gcc_unreachable ();
10937 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
10938 if (node)
10939 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
10941 if (STMT_VINFO_RELEVANT_P (stmt_info))
10943 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10944 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10945 || (call && gimple_call_lhs (call) == NULL_TREE));
10946 *need_to_vectorize = true;
10949 if (PURE_SLP_STMT (stmt_info) && !node)
10951 if (dump_enabled_p ())
10952 dump_printf_loc (MSG_NOTE, vect_location,
10953 "handled only by SLP analysis\n");
10954 return opt_result::success ();
10957 ok = true;
10958 if (!bb_vinfo
10959 && (STMT_VINFO_RELEVANT_P (stmt_info)
10960 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10961 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10962 -mveclibabi= takes preference over library functions with
10963 the simd attribute. */
10964 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10965 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10966 cost_vec)
10967 || vectorizable_conversion (vinfo, stmt_info,
10968 NULL, NULL, node, cost_vec)
10969 || vectorizable_operation (vinfo, stmt_info,
10970 NULL, NULL, node, cost_vec)
10971 || vectorizable_assignment (vinfo, stmt_info,
10972 NULL, NULL, node, cost_vec)
10973 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10974 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10975 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10976 node, node_instance, cost_vec)
10977 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10978 NULL, node, cost_vec)
10979 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10980 || vectorizable_condition (vinfo, stmt_info,
10981 NULL, NULL, node, cost_vec)
10982 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10983 cost_vec)
10984 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10985 stmt_info, NULL, node));
10986 else
10988 if (bb_vinfo)
10989 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10990 || vectorizable_simd_clone_call (vinfo, stmt_info,
10991 NULL, NULL, node, cost_vec)
10992 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10993 cost_vec)
10994 || vectorizable_shift (vinfo, stmt_info,
10995 NULL, NULL, node, cost_vec)
10996 || vectorizable_operation (vinfo, stmt_info,
10997 NULL, NULL, node, cost_vec)
10998 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10999 cost_vec)
11000 || vectorizable_load (vinfo, stmt_info,
11001 NULL, NULL, node, cost_vec)
11002 || vectorizable_store (vinfo, stmt_info,
11003 NULL, NULL, node, cost_vec)
11004 || vectorizable_condition (vinfo, stmt_info,
11005 NULL, NULL, node, cost_vec)
11006 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11007 cost_vec)
11008 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11011 if (node)
11012 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11014 if (!ok)
11015 return opt_result::failure_at (stmt_info->stmt,
11016 "not vectorized:"
11017 " relevant stmt not supported: %G",
11018 stmt_info->stmt);
11020 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11021 need extra handling, except for vectorizable reductions. */
11022 if (!bb_vinfo
11023 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11024 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11025 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11026 stmt_info, NULL, node, node_instance,
11027 false, cost_vec))
11028 return opt_result::failure_at (stmt_info->stmt,
11029 "not vectorized:"
11030 " live stmt not supported: %G",
11031 stmt_info->stmt);
11033 return opt_result::success ();
11037 /* Function vect_transform_stmt.
11039 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11041 bool
11042 vect_transform_stmt (vec_info *vinfo,
11043 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11044 slp_tree slp_node, slp_instance slp_node_instance)
11046 bool is_store = false;
11047 gimple *vec_stmt = NULL;
11048 bool done;
11050 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11052 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11053 if (slp_node)
11054 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11056 switch (STMT_VINFO_TYPE (stmt_info))
11058 case type_demotion_vec_info_type:
11059 case type_promotion_vec_info_type:
11060 case type_conversion_vec_info_type:
11061 done = vectorizable_conversion (vinfo, stmt_info,
11062 gsi, &vec_stmt, slp_node, NULL);
11063 gcc_assert (done);
11064 break;
11066 case induc_vec_info_type:
11067 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11068 stmt_info, &vec_stmt, slp_node,
11069 NULL);
11070 gcc_assert (done);
11071 break;
11073 case shift_vec_info_type:
11074 done = vectorizable_shift (vinfo, stmt_info,
11075 gsi, &vec_stmt, slp_node, NULL);
11076 gcc_assert (done);
11077 break;
11079 case op_vec_info_type:
11080 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11081 NULL);
11082 gcc_assert (done);
11083 break;
11085 case assignment_vec_info_type:
11086 done = vectorizable_assignment (vinfo, stmt_info,
11087 gsi, &vec_stmt, slp_node, NULL);
11088 gcc_assert (done);
11089 break;
11091 case load_vec_info_type:
11092 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11093 NULL);
11094 gcc_assert (done);
11095 break;
11097 case store_vec_info_type:
11098 done = vectorizable_store (vinfo, stmt_info,
11099 gsi, &vec_stmt, slp_node, NULL);
11100 gcc_assert (done);
11101 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11103 /* In case of interleaving, the whole chain is vectorized when the
11104 last store in the chain is reached. Store stmts before the last
11105 one are skipped, and there vec_stmt_info shouldn't be freed
11106 meanwhile. */
11107 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11108 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11109 is_store = true;
11111 else
11112 is_store = true;
11113 break;
11115 case condition_vec_info_type:
11116 done = vectorizable_condition (vinfo, stmt_info,
11117 gsi, &vec_stmt, slp_node, NULL);
11118 gcc_assert (done);
11119 break;
11121 case comparison_vec_info_type:
11122 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11123 slp_node, NULL);
11124 gcc_assert (done);
11125 break;
11127 case call_vec_info_type:
11128 done = vectorizable_call (vinfo, stmt_info,
11129 gsi, &vec_stmt, slp_node, NULL);
11130 break;
11132 case call_simd_clone_vec_info_type:
11133 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11134 slp_node, NULL);
11135 break;
11137 case reduc_vec_info_type:
11138 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11139 gsi, &vec_stmt, slp_node);
11140 gcc_assert (done);
11141 break;
11143 case cycle_phi_info_type:
11144 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11145 &vec_stmt, slp_node, slp_node_instance);
11146 gcc_assert (done);
11147 break;
11149 case lc_phi_info_type:
11150 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11151 stmt_info, &vec_stmt, slp_node);
11152 gcc_assert (done);
11153 break;
11155 case phi_info_type:
11156 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11157 gcc_assert (done);
11158 break;
11160 default:
11161 if (!STMT_VINFO_LIVE_P (stmt_info))
11163 if (dump_enabled_p ())
11164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11165 "stmt not supported.\n");
11166 gcc_unreachable ();
11168 done = true;
11171 if (!slp_node && vec_stmt)
11172 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11174 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11176 /* Handle stmts whose DEF is used outside the loop-nest that is
11177 being vectorized. */
11178 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11179 slp_node_instance, true, NULL);
11180 gcc_assert (done);
11183 if (slp_node)
11184 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11186 return is_store;
11190 /* Remove a group of stores (for SLP or interleaving), free their
11191 stmt_vec_info. */
11193 void
11194 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11196 stmt_vec_info next_stmt_info = first_stmt_info;
11198 while (next_stmt_info)
11200 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11201 next_stmt_info = vect_orig_stmt (next_stmt_info);
11202 /* Free the attached stmt_vec_info and remove the stmt. */
11203 vinfo->remove_stmt (next_stmt_info);
11204 next_stmt_info = tmp;
11208 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11209 elements of type SCALAR_TYPE, or null if the target doesn't support
11210 such a type.
11212 If NUNITS is zero, return a vector type that contains elements of
11213 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11215 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11216 for this vectorization region and want to "autodetect" the best choice.
11217 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11218 and we want the new type to be interoperable with it. PREVAILING_MODE
11219 in this case can be a scalar integer mode or a vector mode; when it
11220 is a vector mode, the function acts like a tree-level version of
11221 related_vector_mode. */
11223 tree
11224 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11225 tree scalar_type, poly_uint64 nunits)
11227 tree orig_scalar_type = scalar_type;
11228 scalar_mode inner_mode;
11229 machine_mode simd_mode;
11230 tree vectype;
11232 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11233 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11234 return NULL_TREE;
11236 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11238 /* For vector types of elements whose mode precision doesn't
11239 match their types precision we use a element type of mode
11240 precision. The vectorization routines will have to make sure
11241 they support the proper result truncation/extension.
11242 We also make sure to build vector types with INTEGER_TYPE
11243 component type only. */
11244 if (INTEGRAL_TYPE_P (scalar_type)
11245 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11246 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11247 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11248 TYPE_UNSIGNED (scalar_type));
11250 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11251 When the component mode passes the above test simply use a type
11252 corresponding to that mode. The theory is that any use that
11253 would cause problems with this will disable vectorization anyway. */
11254 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11255 && !INTEGRAL_TYPE_P (scalar_type))
11256 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11258 /* We can't build a vector type of elements with alignment bigger than
11259 their size. */
11260 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11261 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11262 TYPE_UNSIGNED (scalar_type));
11264 /* If we felt back to using the mode fail if there was
11265 no scalar type for it. */
11266 if (scalar_type == NULL_TREE)
11267 return NULL_TREE;
11269 /* If no prevailing mode was supplied, use the mode the target prefers.
11270 Otherwise lookup a vector mode based on the prevailing mode. */
11271 if (prevailing_mode == VOIDmode)
11273 gcc_assert (known_eq (nunits, 0U));
11274 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11275 if (SCALAR_INT_MODE_P (simd_mode))
11277 /* Traditional behavior is not to take the integer mode
11278 literally, but simply to use it as a way of determining
11279 the vector size. It is up to mode_for_vector to decide
11280 what the TYPE_MODE should be.
11282 Note that nunits == 1 is allowed in order to support single
11283 element vector types. */
11284 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11285 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11286 return NULL_TREE;
11289 else if (SCALAR_INT_MODE_P (prevailing_mode)
11290 || !related_vector_mode (prevailing_mode,
11291 inner_mode, nunits).exists (&simd_mode))
11293 /* Fall back to using mode_for_vector, mostly in the hope of being
11294 able to use an integer mode. */
11295 if (known_eq (nunits, 0U)
11296 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11297 return NULL_TREE;
11299 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11300 return NULL_TREE;
11303 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11305 /* In cases where the mode was chosen by mode_for_vector, check that
11306 the target actually supports the chosen mode, or that it at least
11307 allows the vector mode to be replaced by a like-sized integer. */
11308 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11309 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11310 return NULL_TREE;
11312 /* Re-attach the address-space qualifier if we canonicalized the scalar
11313 type. */
11314 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11315 return build_qualified_type
11316 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11318 return vectype;
11321 /* Function get_vectype_for_scalar_type.
11323 Returns the vector type corresponding to SCALAR_TYPE as supported
11324 by the target. If GROUP_SIZE is nonzero and we're performing BB
11325 vectorization, make sure that the number of elements in the vector
11326 is no bigger than GROUP_SIZE. */
11328 tree
11329 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11330 unsigned int group_size)
11332 /* For BB vectorization, we should always have a group size once we've
11333 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11334 are tentative requests during things like early data reference
11335 analysis and pattern recognition. */
11336 if (is_a <bb_vec_info> (vinfo))
11337 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11338 else
11339 group_size = 0;
11341 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11342 scalar_type);
11343 if (vectype && vinfo->vector_mode == VOIDmode)
11344 vinfo->vector_mode = TYPE_MODE (vectype);
11346 /* Register the natural choice of vector type, before the group size
11347 has been applied. */
11348 if (vectype)
11349 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11351 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11352 try again with an explicit number of elements. */
11353 if (vectype
11354 && group_size
11355 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11357 /* Start with the biggest number of units that fits within
11358 GROUP_SIZE and halve it until we find a valid vector type.
11359 Usually either the first attempt will succeed or all will
11360 fail (in the latter case because GROUP_SIZE is too small
11361 for the target), but it's possible that a target could have
11362 a hole between supported vector types.
11364 If GROUP_SIZE is not a power of 2, this has the effect of
11365 trying the largest power of 2 that fits within the group,
11366 even though the group is not a multiple of that vector size.
11367 The BB vectorizer will then try to carve up the group into
11368 smaller pieces. */
11369 unsigned int nunits = 1 << floor_log2 (group_size);
11372 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11373 scalar_type, nunits);
11374 nunits /= 2;
11376 while (nunits > 1 && !vectype);
11379 return vectype;
11382 /* Return the vector type corresponding to SCALAR_TYPE as supported
11383 by the target. NODE, if nonnull, is the SLP tree node that will
11384 use the returned vector type. */
11386 tree
11387 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11389 unsigned int group_size = 0;
11390 if (node)
11391 group_size = SLP_TREE_LANES (node);
11392 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11395 /* Function get_mask_type_for_scalar_type.
11397 Returns the mask type corresponding to a result of comparison
11398 of vectors of specified SCALAR_TYPE as supported by target.
11399 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11400 make sure that the number of elements in the vector is no bigger
11401 than GROUP_SIZE. */
11403 tree
11404 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11405 unsigned int group_size)
11407 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11409 if (!vectype)
11410 return NULL;
11412 return truth_type_for (vectype);
11415 /* Function get_same_sized_vectype
11417 Returns a vector type corresponding to SCALAR_TYPE of size
11418 VECTOR_TYPE if supported by the target. */
11420 tree
11421 get_same_sized_vectype (tree scalar_type, tree vector_type)
11423 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11424 return truth_type_for (vector_type);
11426 poly_uint64 nunits;
11427 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11428 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11429 return NULL_TREE;
11431 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11432 scalar_type, nunits);
11435 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11436 would not change the chosen vector modes. */
11438 bool
11439 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11441 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11442 i != vinfo->used_vector_modes.end (); ++i)
11443 if (!VECTOR_MODE_P (*i)
11444 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11445 return false;
11446 return true;
11449 /* Function vect_is_simple_use.
11451 Input:
11452 VINFO - the vect info of the loop or basic block that is being vectorized.
11453 OPERAND - operand in the loop or bb.
11454 Output:
11455 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11456 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11457 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11458 the definition could be anywhere in the function
11459 DT - the type of definition
11461 Returns whether a stmt with OPERAND can be vectorized.
11462 For loops, supportable operands are constants, loop invariants, and operands
11463 that are defined by the current iteration of the loop. Unsupportable
11464 operands are those that are defined by a previous iteration of the loop (as
11465 is the case in reduction/induction computations).
11466 For basic blocks, supportable operands are constants and bb invariants.
11467 For now, operands defined outside the basic block are not supported. */
11469 bool
11470 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11471 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11473 if (def_stmt_info_out)
11474 *def_stmt_info_out = NULL;
11475 if (def_stmt_out)
11476 *def_stmt_out = NULL;
11477 *dt = vect_unknown_def_type;
11479 if (dump_enabled_p ())
11481 dump_printf_loc (MSG_NOTE, vect_location,
11482 "vect_is_simple_use: operand ");
11483 if (TREE_CODE (operand) == SSA_NAME
11484 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11485 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11486 else
11487 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11490 if (CONSTANT_CLASS_P (operand))
11491 *dt = vect_constant_def;
11492 else if (is_gimple_min_invariant (operand))
11493 *dt = vect_external_def;
11494 else if (TREE_CODE (operand) != SSA_NAME)
11495 *dt = vect_unknown_def_type;
11496 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11497 *dt = vect_external_def;
11498 else
11500 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11501 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11502 if (!stmt_vinfo)
11503 *dt = vect_external_def;
11504 else
11506 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11507 def_stmt = stmt_vinfo->stmt;
11508 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11509 if (def_stmt_info_out)
11510 *def_stmt_info_out = stmt_vinfo;
11512 if (def_stmt_out)
11513 *def_stmt_out = def_stmt;
11516 if (dump_enabled_p ())
11518 dump_printf (MSG_NOTE, ", type of def: ");
11519 switch (*dt)
11521 case vect_uninitialized_def:
11522 dump_printf (MSG_NOTE, "uninitialized\n");
11523 break;
11524 case vect_constant_def:
11525 dump_printf (MSG_NOTE, "constant\n");
11526 break;
11527 case vect_external_def:
11528 dump_printf (MSG_NOTE, "external\n");
11529 break;
11530 case vect_internal_def:
11531 dump_printf (MSG_NOTE, "internal\n");
11532 break;
11533 case vect_induction_def:
11534 dump_printf (MSG_NOTE, "induction\n");
11535 break;
11536 case vect_reduction_def:
11537 dump_printf (MSG_NOTE, "reduction\n");
11538 break;
11539 case vect_double_reduction_def:
11540 dump_printf (MSG_NOTE, "double reduction\n");
11541 break;
11542 case vect_nested_cycle:
11543 dump_printf (MSG_NOTE, "nested cycle\n");
11544 break;
11545 case vect_unknown_def_type:
11546 dump_printf (MSG_NOTE, "unknown\n");
11547 break;
11551 if (*dt == vect_unknown_def_type)
11553 if (dump_enabled_p ())
11554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11555 "Unsupported pattern.\n");
11556 return false;
11559 return true;
11562 /* Function vect_is_simple_use.
11564 Same as vect_is_simple_use but also determines the vector operand
11565 type of OPERAND and stores it to *VECTYPE. If the definition of
11566 OPERAND is vect_uninitialized_def, vect_constant_def or
11567 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11568 is responsible to compute the best suited vector type for the
11569 scalar operand. */
11571 bool
11572 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11573 tree *vectype, stmt_vec_info *def_stmt_info_out,
11574 gimple **def_stmt_out)
11576 stmt_vec_info def_stmt_info;
11577 gimple *def_stmt;
11578 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11579 return false;
11581 if (def_stmt_out)
11582 *def_stmt_out = def_stmt;
11583 if (def_stmt_info_out)
11584 *def_stmt_info_out = def_stmt_info;
11586 /* Now get a vector type if the def is internal, otherwise supply
11587 NULL_TREE and leave it up to the caller to figure out a proper
11588 type for the use stmt. */
11589 if (*dt == vect_internal_def
11590 || *dt == vect_induction_def
11591 || *dt == vect_reduction_def
11592 || *dt == vect_double_reduction_def
11593 || *dt == vect_nested_cycle)
11595 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11596 gcc_assert (*vectype != NULL_TREE);
11597 if (dump_enabled_p ())
11598 dump_printf_loc (MSG_NOTE, vect_location,
11599 "vect_is_simple_use: vectype %T\n", *vectype);
11601 else if (*dt == vect_uninitialized_def
11602 || *dt == vect_constant_def
11603 || *dt == vect_external_def)
11604 *vectype = NULL_TREE;
11605 else
11606 gcc_unreachable ();
11608 return true;
11611 /* Function vect_is_simple_use.
11613 Same as vect_is_simple_use but determines the operand by operand
11614 position OPERAND from either STMT or SLP_NODE, filling in *OP
11615 and *SLP_DEF (when SLP_NODE is not NULL). */
11617 bool
11618 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11619 unsigned operand, tree *op, slp_tree *slp_def,
11620 enum vect_def_type *dt,
11621 tree *vectype, stmt_vec_info *def_stmt_info_out)
11623 if (slp_node)
11625 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11626 *slp_def = child;
11627 *vectype = SLP_TREE_VECTYPE (child);
11628 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11630 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11631 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11633 else
11635 if (def_stmt_info_out)
11636 *def_stmt_info_out = NULL;
11637 *op = SLP_TREE_SCALAR_OPS (child)[0];
11638 *dt = SLP_TREE_DEF_TYPE (child);
11639 return true;
11642 else
11644 *slp_def = NULL;
11645 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11647 if (gimple_assign_rhs_code (ass) == COND_EXPR
11648 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11650 if (operand < 2)
11651 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11652 else
11653 *op = gimple_op (ass, operand);
11655 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11656 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11657 else
11658 *op = gimple_op (ass, operand + 1);
11660 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11661 *op = gimple_call_arg (call, operand);
11662 else
11663 gcc_unreachable ();
11664 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11668 /* If OP is not NULL and is external or constant update its vector
11669 type with VECTYPE. Returns true if successful or false if not,
11670 for example when conflicting vector types are present. */
11672 bool
11673 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11675 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11676 return true;
11677 if (SLP_TREE_VECTYPE (op))
11678 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11679 SLP_TREE_VECTYPE (op) = vectype;
11680 return true;
11683 /* Function supportable_widening_operation
11685 Check whether an operation represented by the code CODE is a
11686 widening operation that is supported by the target platform in
11687 vector form (i.e., when operating on arguments of type VECTYPE_IN
11688 producing a result of type VECTYPE_OUT).
11690 Widening operations we currently support are NOP (CONVERT), FLOAT,
11691 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11692 are supported by the target platform either directly (via vector
11693 tree-codes), or via target builtins.
11695 Output:
11696 - CODE1 and CODE2 are codes of vector operations to be used when
11697 vectorizing the operation, if available.
11698 - MULTI_STEP_CVT determines the number of required intermediate steps in
11699 case of multi-step conversion (like char->short->int - in that case
11700 MULTI_STEP_CVT will be 1).
11701 - INTERM_TYPES contains the intermediate type required to perform the
11702 widening operation (short in the above example). */
11704 bool
11705 supportable_widening_operation (vec_info *vinfo,
11706 enum tree_code code, stmt_vec_info stmt_info,
11707 tree vectype_out, tree vectype_in,
11708 enum tree_code *code1, enum tree_code *code2,
11709 int *multi_step_cvt,
11710 vec<tree> *interm_types)
11712 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11713 class loop *vect_loop = NULL;
11714 machine_mode vec_mode;
11715 enum insn_code icode1, icode2;
11716 optab optab1, optab2;
11717 tree vectype = vectype_in;
11718 tree wide_vectype = vectype_out;
11719 enum tree_code c1, c2;
11720 int i;
11721 tree prev_type, intermediate_type;
11722 machine_mode intermediate_mode, prev_mode;
11723 optab optab3, optab4;
11725 *multi_step_cvt = 0;
11726 if (loop_info)
11727 vect_loop = LOOP_VINFO_LOOP (loop_info);
11729 switch (code)
11731 case WIDEN_MULT_EXPR:
11732 /* The result of a vectorized widening operation usually requires
11733 two vectors (because the widened results do not fit into one vector).
11734 The generated vector results would normally be expected to be
11735 generated in the same order as in the original scalar computation,
11736 i.e. if 8 results are generated in each vector iteration, they are
11737 to be organized as follows:
11738 vect1: [res1,res2,res3,res4],
11739 vect2: [res5,res6,res7,res8].
11741 However, in the special case that the result of the widening
11742 operation is used in a reduction computation only, the order doesn't
11743 matter (because when vectorizing a reduction we change the order of
11744 the computation). Some targets can take advantage of this and
11745 generate more efficient code. For example, targets like Altivec,
11746 that support widen_mult using a sequence of {mult_even,mult_odd}
11747 generate the following vectors:
11748 vect1: [res1,res3,res5,res7],
11749 vect2: [res2,res4,res6,res8].
11751 When vectorizing outer-loops, we execute the inner-loop sequentially
11752 (each vectorized inner-loop iteration contributes to VF outer-loop
11753 iterations in parallel). We therefore don't allow to change the
11754 order of the computation in the inner-loop during outer-loop
11755 vectorization. */
11756 /* TODO: Another case in which order doesn't *really* matter is when we
11757 widen and then contract again, e.g. (short)((int)x * y >> 8).
11758 Normally, pack_trunc performs an even/odd permute, whereas the
11759 repack from an even/odd expansion would be an interleave, which
11760 would be significantly simpler for e.g. AVX2. */
11761 /* In any case, in order to avoid duplicating the code below, recurse
11762 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11763 are properly set up for the caller. If we fail, we'll continue with
11764 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11765 if (vect_loop
11766 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11767 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11768 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11769 stmt_info, vectype_out,
11770 vectype_in, code1, code2,
11771 multi_step_cvt, interm_types))
11773 /* Elements in a vector with vect_used_by_reduction property cannot
11774 be reordered if the use chain with this property does not have the
11775 same operation. One such an example is s += a * b, where elements
11776 in a and b cannot be reordered. Here we check if the vector defined
11777 by STMT is only directly used in the reduction statement. */
11778 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11779 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11780 if (use_stmt_info
11781 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11782 return true;
11784 c1 = VEC_WIDEN_MULT_LO_EXPR;
11785 c2 = VEC_WIDEN_MULT_HI_EXPR;
11786 break;
11788 case DOT_PROD_EXPR:
11789 c1 = DOT_PROD_EXPR;
11790 c2 = DOT_PROD_EXPR;
11791 break;
11793 case SAD_EXPR:
11794 c1 = SAD_EXPR;
11795 c2 = SAD_EXPR;
11796 break;
11798 case VEC_WIDEN_MULT_EVEN_EXPR:
11799 /* Support the recursion induced just above. */
11800 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11801 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11802 break;
11804 case WIDEN_LSHIFT_EXPR:
11805 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11806 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11807 break;
11809 case WIDEN_PLUS_EXPR:
11810 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11811 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11812 break;
11814 case WIDEN_MINUS_EXPR:
11815 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11816 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11817 break;
11819 CASE_CONVERT:
11820 c1 = VEC_UNPACK_LO_EXPR;
11821 c2 = VEC_UNPACK_HI_EXPR;
11822 break;
11824 case FLOAT_EXPR:
11825 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11826 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11827 break;
11829 case FIX_TRUNC_EXPR:
11830 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11831 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11832 break;
11834 default:
11835 gcc_unreachable ();
11838 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11839 std::swap (c1, c2);
11841 if (code == FIX_TRUNC_EXPR)
11843 /* The signedness is determined from output operand. */
11844 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11845 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11847 else if (CONVERT_EXPR_CODE_P (code)
11848 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11849 && VECTOR_BOOLEAN_TYPE_P (vectype)
11850 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11851 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11853 /* If the input and result modes are the same, a different optab
11854 is needed where we pass in the number of units in vectype. */
11855 optab1 = vec_unpacks_sbool_lo_optab;
11856 optab2 = vec_unpacks_sbool_hi_optab;
11858 else
11860 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11861 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11864 if (!optab1 || !optab2)
11865 return false;
11867 vec_mode = TYPE_MODE (vectype);
11868 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11869 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11870 return false;
11872 *code1 = c1;
11873 *code2 = c2;
11875 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11876 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11878 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11879 return true;
11880 /* For scalar masks we may have different boolean
11881 vector types having the same QImode. Thus we
11882 add additional check for elements number. */
11883 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11884 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11885 return true;
11888 /* Check if it's a multi-step conversion that can be done using intermediate
11889 types. */
11891 prev_type = vectype;
11892 prev_mode = vec_mode;
11894 if (!CONVERT_EXPR_CODE_P (code))
11895 return false;
11897 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11898 intermediate steps in promotion sequence. We try
11899 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11900 not. */
11901 interm_types->create (MAX_INTERM_CVT_STEPS);
11902 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11904 intermediate_mode = insn_data[icode1].operand[0].mode;
11905 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11906 intermediate_type
11907 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11908 else
11909 intermediate_type
11910 = lang_hooks.types.type_for_mode (intermediate_mode,
11911 TYPE_UNSIGNED (prev_type));
11913 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11914 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11915 && intermediate_mode == prev_mode
11916 && SCALAR_INT_MODE_P (prev_mode))
11918 /* If the input and result modes are the same, a different optab
11919 is needed where we pass in the number of units in vectype. */
11920 optab3 = vec_unpacks_sbool_lo_optab;
11921 optab4 = vec_unpacks_sbool_hi_optab;
11923 else
11925 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11926 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11929 if (!optab3 || !optab4
11930 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11931 || insn_data[icode1].operand[0].mode != intermediate_mode
11932 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11933 || insn_data[icode2].operand[0].mode != intermediate_mode
11934 || ((icode1 = optab_handler (optab3, intermediate_mode))
11935 == CODE_FOR_nothing)
11936 || ((icode2 = optab_handler (optab4, intermediate_mode))
11937 == CODE_FOR_nothing))
11938 break;
11940 interm_types->quick_push (intermediate_type);
11941 (*multi_step_cvt)++;
11943 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11944 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11946 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11947 return true;
11948 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11949 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11950 return true;
11953 prev_type = intermediate_type;
11954 prev_mode = intermediate_mode;
11957 interm_types->release ();
11958 return false;
11962 /* Function supportable_narrowing_operation
11964 Check whether an operation represented by the code CODE is a
11965 narrowing operation that is supported by the target platform in
11966 vector form (i.e., when operating on arguments of type VECTYPE_IN
11967 and producing a result of type VECTYPE_OUT).
11969 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11970 and FLOAT. This function checks if these operations are supported by
11971 the target platform directly via vector tree-codes.
11973 Output:
11974 - CODE1 is the code of a vector operation to be used when
11975 vectorizing the operation, if available.
11976 - MULTI_STEP_CVT determines the number of required intermediate steps in
11977 case of multi-step conversion (like int->short->char - in that case
11978 MULTI_STEP_CVT will be 1).
11979 - INTERM_TYPES contains the intermediate type required to perform the
11980 narrowing operation (short in the above example). */
11982 bool
11983 supportable_narrowing_operation (enum tree_code code,
11984 tree vectype_out, tree vectype_in,
11985 enum tree_code *code1, int *multi_step_cvt,
11986 vec<tree> *interm_types)
11988 machine_mode vec_mode;
11989 enum insn_code icode1;
11990 optab optab1, interm_optab;
11991 tree vectype = vectype_in;
11992 tree narrow_vectype = vectype_out;
11993 enum tree_code c1;
11994 tree intermediate_type, prev_type;
11995 machine_mode intermediate_mode, prev_mode;
11996 int i;
11997 bool uns;
11999 *multi_step_cvt = 0;
12000 switch (code)
12002 CASE_CONVERT:
12003 c1 = VEC_PACK_TRUNC_EXPR;
12004 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12005 && VECTOR_BOOLEAN_TYPE_P (vectype)
12006 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12007 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12008 optab1 = vec_pack_sbool_trunc_optab;
12009 else
12010 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12011 break;
12013 case FIX_TRUNC_EXPR:
12014 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12015 /* The signedness is determined from output operand. */
12016 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12017 break;
12019 case FLOAT_EXPR:
12020 c1 = VEC_PACK_FLOAT_EXPR;
12021 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12022 break;
12024 default:
12025 gcc_unreachable ();
12028 if (!optab1)
12029 return false;
12031 vec_mode = TYPE_MODE (vectype);
12032 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12033 return false;
12035 *code1 = c1;
12037 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12039 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12040 return true;
12041 /* For scalar masks we may have different boolean
12042 vector types having the same QImode. Thus we
12043 add additional check for elements number. */
12044 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12045 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12046 return true;
12049 if (code == FLOAT_EXPR)
12050 return false;
12052 /* Check if it's a multi-step conversion that can be done using intermediate
12053 types. */
12054 prev_mode = vec_mode;
12055 prev_type = vectype;
12056 if (code == FIX_TRUNC_EXPR)
12057 uns = TYPE_UNSIGNED (vectype_out);
12058 else
12059 uns = TYPE_UNSIGNED (vectype);
12061 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12062 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12063 costly than signed. */
12064 if (code == FIX_TRUNC_EXPR && uns)
12066 enum insn_code icode2;
12068 intermediate_type
12069 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12070 interm_optab
12071 = optab_for_tree_code (c1, intermediate_type, optab_default);
12072 if (interm_optab != unknown_optab
12073 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12074 && insn_data[icode1].operand[0].mode
12075 == insn_data[icode2].operand[0].mode)
12077 uns = false;
12078 optab1 = interm_optab;
12079 icode1 = icode2;
12083 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12084 intermediate steps in promotion sequence. We try
12085 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12086 interm_types->create (MAX_INTERM_CVT_STEPS);
12087 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12089 intermediate_mode = insn_data[icode1].operand[0].mode;
12090 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12091 intermediate_type
12092 = vect_double_mask_nunits (prev_type, intermediate_mode);
12093 else
12094 intermediate_type
12095 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12096 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12097 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12098 && intermediate_mode == prev_mode
12099 && SCALAR_INT_MODE_P (prev_mode))
12100 interm_optab = vec_pack_sbool_trunc_optab;
12101 else
12102 interm_optab
12103 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12104 optab_default);
12105 if (!interm_optab
12106 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12107 || insn_data[icode1].operand[0].mode != intermediate_mode
12108 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12109 == CODE_FOR_nothing))
12110 break;
12112 interm_types->quick_push (intermediate_type);
12113 (*multi_step_cvt)++;
12115 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12117 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12118 return true;
12119 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12120 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12121 return true;
12124 prev_mode = intermediate_mode;
12125 prev_type = intermediate_type;
12126 optab1 = interm_optab;
12129 interm_types->release ();
12130 return false;
12133 /* Generate and return a vector mask of MASK_TYPE such that
12134 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12135 Add the statements to SEQ. */
12137 tree
12138 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12139 tree end_index, const char *name)
12141 tree cmp_type = TREE_TYPE (start_index);
12142 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12143 cmp_type, mask_type,
12144 OPTIMIZE_FOR_SPEED));
12145 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12146 start_index, end_index,
12147 build_zero_cst (mask_type));
12148 tree tmp;
12149 if (name)
12150 tmp = make_temp_ssa_name (mask_type, NULL, name);
12151 else
12152 tmp = make_ssa_name (mask_type);
12153 gimple_call_set_lhs (call, tmp);
12154 gimple_seq_add_stmt (seq, call);
12155 return tmp;
12158 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12159 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12161 tree
12162 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12163 tree end_index)
12165 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12166 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12169 /* Try to compute the vector types required to vectorize STMT_INFO,
12170 returning true on success and false if vectorization isn't possible.
12171 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12172 take sure that the number of elements in the vectors is no bigger
12173 than GROUP_SIZE.
12175 On success:
12177 - Set *STMT_VECTYPE_OUT to:
12178 - NULL_TREE if the statement doesn't need to be vectorized;
12179 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12181 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12182 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12183 statement does not help to determine the overall number of units. */
12185 opt_result
12186 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12187 tree *stmt_vectype_out,
12188 tree *nunits_vectype_out,
12189 unsigned int group_size)
12191 gimple *stmt = stmt_info->stmt;
12193 /* For BB vectorization, we should always have a group size once we've
12194 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12195 are tentative requests during things like early data reference
12196 analysis and pattern recognition. */
12197 if (is_a <bb_vec_info> (vinfo))
12198 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12199 else
12200 group_size = 0;
12202 *stmt_vectype_out = NULL_TREE;
12203 *nunits_vectype_out = NULL_TREE;
12205 if (gimple_get_lhs (stmt) == NULL_TREE
12206 /* MASK_STORE has no lhs, but is ok. */
12207 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12209 if (is_a <gcall *> (stmt))
12211 /* Ignore calls with no lhs. These must be calls to
12212 #pragma omp simd functions, and what vectorization factor
12213 it really needs can't be determined until
12214 vectorizable_simd_clone_call. */
12215 if (dump_enabled_p ())
12216 dump_printf_loc (MSG_NOTE, vect_location,
12217 "defer to SIMD clone analysis.\n");
12218 return opt_result::success ();
12221 return opt_result::failure_at (stmt,
12222 "not vectorized: irregular stmt.%G", stmt);
12225 tree vectype;
12226 tree scalar_type = NULL_TREE;
12227 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12229 vectype = STMT_VINFO_VECTYPE (stmt_info);
12230 if (dump_enabled_p ())
12231 dump_printf_loc (MSG_NOTE, vect_location,
12232 "precomputed vectype: %T\n", vectype);
12234 else if (vect_use_mask_type_p (stmt_info))
12236 unsigned int precision = stmt_info->mask_precision;
12237 scalar_type = build_nonstandard_integer_type (precision, 1);
12238 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12239 if (!vectype)
12240 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12241 " data-type %T\n", scalar_type);
12242 if (dump_enabled_p ())
12243 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12245 else
12247 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12248 scalar_type = TREE_TYPE (DR_REF (dr));
12249 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12250 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12251 else
12252 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12254 if (dump_enabled_p ())
12256 if (group_size)
12257 dump_printf_loc (MSG_NOTE, vect_location,
12258 "get vectype for scalar type (group size %d):"
12259 " %T\n", group_size, scalar_type);
12260 else
12261 dump_printf_loc (MSG_NOTE, vect_location,
12262 "get vectype for scalar type: %T\n", scalar_type);
12264 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12265 if (!vectype)
12266 return opt_result::failure_at (stmt,
12267 "not vectorized:"
12268 " unsupported data-type %T\n",
12269 scalar_type);
12271 if (dump_enabled_p ())
12272 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12275 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12276 return opt_result::failure_at (stmt,
12277 "not vectorized: vector stmt in loop:%G",
12278 stmt);
12280 *stmt_vectype_out = vectype;
12282 /* Don't try to compute scalar types if the stmt produces a boolean
12283 vector; use the existing vector type instead. */
12284 tree nunits_vectype = vectype;
12285 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12287 /* The number of units is set according to the smallest scalar
12288 type (or the largest vector size, but we only support one
12289 vector size per vectorization). */
12290 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12291 TREE_TYPE (vectype));
12292 if (scalar_type != TREE_TYPE (vectype))
12294 if (dump_enabled_p ())
12295 dump_printf_loc (MSG_NOTE, vect_location,
12296 "get vectype for smallest scalar type: %T\n",
12297 scalar_type);
12298 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12299 group_size);
12300 if (!nunits_vectype)
12301 return opt_result::failure_at
12302 (stmt, "not vectorized: unsupported data-type %T\n",
12303 scalar_type);
12304 if (dump_enabled_p ())
12305 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12306 nunits_vectype);
12310 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12311 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12312 return opt_result::failure_at (stmt,
12313 "Not vectorized: Incompatible number "
12314 "of vector subparts between %T and %T\n",
12315 nunits_vectype, *stmt_vectype_out);
12317 if (dump_enabled_p ())
12319 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12320 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12321 dump_printf (MSG_NOTE, "\n");
12324 *nunits_vectype_out = nunits_vectype;
12325 return opt_result::success ();
12328 /* Generate and return statement sequence that sets vector length LEN that is:
12330 min_of_start_and_end = min (START_INDEX, END_INDEX);
12331 left_len = END_INDEX - min_of_start_and_end;
12332 rhs = min (left_len, LEN_LIMIT);
12333 LEN = rhs;
12335 Note: the cost of the code generated by this function is modeled
12336 by vect_estimate_min_profitable_iters, so changes here may need
12337 corresponding changes there. */
12339 gimple_seq
12340 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12342 gimple_seq stmts = NULL;
12343 tree len_type = TREE_TYPE (len);
12344 gcc_assert (TREE_TYPE (start_index) == len_type);
12346 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12347 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12348 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12349 gimple* stmt = gimple_build_assign (len, rhs);
12350 gimple_seq_add_stmt (&stmts, stmt);
12352 return stmts;