tree-optimization/101467 - fix make_temp_ssa_name usage
[official-gcc.git] / gcc / tree-vect-stmts.c
blob0ef46962618b5ae8da6163593f023a6b49a4a454
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
781 if (fatal)
782 *fatal = false;
783 return res;
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
806 int inside_cost = 0, prologue_cost = 0;
808 gcc_assert (cost_vec != NULL);
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
841 static void
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 enum vect_def_type *dt,
844 unsigned int ncopies, int pwr,
845 stmt_vector_for_cost *cost_vec)
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
850 for (i = 0; i < pwr + 1; i++)
852 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 stmt_info, 0, vect_body);
854 ncopies *= 2;
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost, prologue_cost);
869 /* Returns true if the current function returns DECL. */
871 static bool
872 cfun_returns (tree decl)
874 edge_iterator ei;
875 edge e;
876 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
878 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879 if (!ret)
880 continue;
881 if (gimple_return_retval (ret) == decl)
882 return true;
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
885 though. */
886 gimple *def = ret;
889 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
891 while (gimple_clobber_p (def));
892 if (is_a <gassign *> (def)
893 && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 && gimple_assign_rhs1 (def) == decl)
895 return true;
897 return false;
900 /* Function vect_model_store_cost
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
905 static void
906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 vect_memory_access_type memory_access_type,
908 vec_load_store_type vls_type, slp_tree slp_node,
909 stmt_vector_for_cost *cost_vec)
911 unsigned int inside_cost = 0, prologue_cost = 0;
912 stmt_vec_info first_stmt_info = stmt_info;
913 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
915 /* ??? Somehow we need to fix this at the callers. */
916 if (slp_node)
917 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
919 if (vls_type == VLS_STORE_INVARIANT)
921 if (!slp_node)
922 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt_info == stmt_info);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = DR_GROUP_SIZE (first_stmt_info);
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963 inside_cost += record_stmt_cost (cost_vec,
964 ncopies * assumed_nunits,
965 scalar_store, stmt_info, 0, vect_body);
967 else
968 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
970 if (memory_access_type == VMAT_ELEMENTWISE
971 || memory_access_type == VMAT_STRIDED_SLP)
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975 inside_cost += record_stmt_cost (cost_vec,
976 ncopies * assumed_nunits,
977 vec_to_scalar, stmt_info, 0, vect_body);
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985 if (base
986 && (TREE_CODE (base) == RESULT_DECL
987 || (DECL_P (base) && cfun_returns (base)))
988 && !aggregate_value_p (base, cfun->decl))
990 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
992 if (REG_P (reg))
994 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
997 if (nregs > 1)
999 /* Spill. */
1000 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 vector_store,
1002 stmt_info, 0, vect_epilogue);
1003 /* Loads. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 scalar_load,
1006 stmt_info, 0, vect_epilogue);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1018 /* Calculate cost of DR's memory access. */
1019 void
1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 unsigned int *inside_cost,
1022 stmt_vector_for_cost *body_cost_vec)
1024 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1028 switch (alignment_support_scheme)
1030 case dr_aligned:
1032 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 vector_store, stmt_info, 0,
1034 vect_body);
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: aligned.\n");
1039 break;
1042 case dr_unaligned_supported:
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 unaligned_store, stmt_info,
1047 DR_MISALIGNMENT (dr_info),
1048 vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_store_cost: unaligned supported by "
1052 "hardware.\n");
1053 break;
1056 case dr_unaligned_unsupported:
1058 *inside_cost = VECT_MAX_COST;
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062 "vect_model_store_cost: unsupported access.\n");
1063 break;
1066 default:
1067 gcc_unreachable ();
1072 /* Function vect_model_load_cost
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1079 static void
1080 vect_model_load_cost (vec_info *vinfo,
1081 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 vect_memory_access_type memory_access_type,
1083 slp_tree slp_node,
1084 stmt_vector_for_cost *cost_vec)
1086 unsigned int inside_cost = 0, prologue_cost = 0;
1087 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1089 gcc_assert (cost_vec);
1091 /* ??? Somehow we need to fix this at the callers. */
1092 if (slp_node)
1093 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1095 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms, n_loads;
1102 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1103 vf, true, &n_perms, &n_loads);
1104 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1105 first_stmt_info, 0, vect_body);
1107 /* And adjust the number of loads performed. This handles
1108 redundancies as well as loads that are later dead. */
1109 ncopies = n_loads;
1112 /* Grouped loads read all elements in the group at once,
1113 so we want the DR for the first statement. */
1114 stmt_vec_info first_stmt_info = stmt_info;
1115 if (!slp_node && grouped_access_p)
1116 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1118 /* True if we should include any once-per-group costs as well as
1119 the cost of the statement itself. For SLP we only get called
1120 once per group anyhow. */
1121 bool first_stmt_p = (first_stmt_info == stmt_info);
1123 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1124 ones we actually need. Account for the cost of unused results. */
1125 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1127 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1128 stmt_vec_info next_stmt_info = first_stmt_info;
1131 gaps -= 1;
1132 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1134 while (next_stmt_info);
1135 if (gaps)
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: %d unused vectors.\n",
1140 gaps);
1141 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps, false,
1142 &inside_cost, &prologue_cost,
1143 cost_vec, cost_vec, true);
1147 /* We assume that the cost of a single load-lanes instruction is
1148 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1149 access is instead being provided by a load-and-permute operation,
1150 include the cost of the permutes. */
1151 if (first_stmt_p
1152 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1154 /* Uses an even and odd extract operations or shuffle operations
1155 for each needed permute. */
1156 int group_size = DR_GROUP_SIZE (first_stmt_info);
1157 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1158 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1159 stmt_info, 0, vect_body);
1161 if (dump_enabled_p ())
1162 dump_printf_loc (MSG_NOTE, vect_location,
1163 "vect_model_load_cost: strided group_size = %d .\n",
1164 group_size);
1167 /* The loads themselves. */
1168 if (memory_access_type == VMAT_ELEMENTWISE
1169 || memory_access_type == VMAT_GATHER_SCATTER)
1171 /* N scalar loads plus gathering them into a vector. */
1172 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1173 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1174 inside_cost += record_stmt_cost (cost_vec,
1175 ncopies * assumed_nunits,
1176 scalar_load, stmt_info, 0, vect_body);
1178 else
1179 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1180 &inside_cost, &prologue_cost,
1181 cost_vec, cost_vec, true);
1182 if (memory_access_type == VMAT_ELEMENTWISE
1183 || memory_access_type == VMAT_STRIDED_SLP)
1184 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1185 stmt_info, 0, vect_body);
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: inside_cost = %d, "
1190 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1194 /* Calculate cost of DR's memory access. */
1195 void
1196 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1197 bool add_realign_cost, unsigned int *inside_cost,
1198 unsigned int *prologue_cost,
1199 stmt_vector_for_cost *prologue_cost_vec,
1200 stmt_vector_for_cost *body_cost_vec,
1201 bool record_prologue_costs)
1203 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1204 int alignment_support_scheme
1205 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1207 switch (alignment_support_scheme)
1209 case dr_aligned:
1211 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1212 stmt_info, 0, vect_body);
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_NOTE, vect_location,
1216 "vect_model_load_cost: aligned.\n");
1218 break;
1220 case dr_unaligned_supported:
1222 /* Here, we assign an additional cost for the unaligned load. */
1223 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1224 unaligned_load, stmt_info,
1225 DR_MISALIGNMENT (dr_info),
1226 vect_body);
1228 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_model_load_cost: unaligned supported by "
1231 "hardware.\n");
1233 break;
1235 case dr_explicit_realign:
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1238 vector_load, stmt_info, 0, vect_body);
1239 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1240 vec_perm, stmt_info, 0, vect_body);
1242 /* FIXME: If the misalignment remains fixed across the iterations of
1243 the containing loop, the following cost should be added to the
1244 prologue costs. */
1245 if (targetm.vectorize.builtin_mask_for_load)
1246 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1247 stmt_info, 0, vect_body);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: explicit realign\n");
1253 break;
1255 case dr_explicit_realign_optimized:
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_model_load_cost: unaligned software "
1260 "pipelined.\n");
1262 /* Unaligned software pipeline has a load of an address, an initial
1263 load, and possibly a mask operation to "prime" the loop. However,
1264 if this is an access in a group of loads, which provide grouped
1265 access, then the above cost should only be considered for one
1266 access in the group. Inside the loop, there is a load op
1267 and a realignment op. */
1269 if (add_realign_cost && record_prologue_costs)
1271 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1272 vector_stmt, stmt_info,
1273 0, vect_prologue);
1274 if (targetm.vectorize.builtin_mask_for_load)
1275 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1276 vector_stmt, stmt_info,
1277 0, vect_prologue);
1280 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1281 stmt_info, 0, vect_body);
1282 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1283 stmt_info, 0, vect_body);
1285 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE, vect_location,
1287 "vect_model_load_cost: explicit realign optimized"
1288 "\n");
1290 break;
1293 case dr_unaligned_unsupported:
1295 *inside_cost = VECT_MAX_COST;
1297 if (dump_enabled_p ())
1298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1299 "vect_model_load_cost: unsupported access.\n");
1300 break;
1303 default:
1304 gcc_unreachable ();
1308 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1309 the loop preheader for the vectorized stmt STMT_VINFO. */
1311 static void
1312 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1313 gimple_stmt_iterator *gsi)
1315 if (gsi)
1316 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1317 else
1318 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "created new init_stmt: %G", new_stmt);
1325 /* Function vect_init_vector.
1327 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1328 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1329 vector type a vector with all elements equal to VAL is created first.
1330 Place the initialization at GSI if it is not NULL. Otherwise, place the
1331 initialization at the loop preheader.
1332 Return the DEF of INIT_STMT.
1333 It will be used in the vectorization of STMT_INFO. */
1335 tree
1336 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1337 gimple_stmt_iterator *gsi)
1339 gimple *init_stmt;
1340 tree new_temp;
1342 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1343 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1345 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1346 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1348 /* Scalar boolean value should be transformed into
1349 all zeros or all ones value before building a vector. */
1350 if (VECTOR_BOOLEAN_TYPE_P (type))
1352 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1353 tree false_val = build_zero_cst (TREE_TYPE (type));
1355 if (CONSTANT_CLASS_P (val))
1356 val = integer_zerop (val) ? false_val : true_val;
1357 else
1359 new_temp = make_ssa_name (TREE_TYPE (type));
1360 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1361 val, true_val, false_val);
1362 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1363 val = new_temp;
1366 else
1368 gimple_seq stmts = NULL;
1369 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1370 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1371 TREE_TYPE (type), val);
1372 else
1373 /* ??? Condition vectorization expects us to do
1374 promotion of invariant/external defs. */
1375 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1376 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1377 !gsi_end_p (gsi2); )
1379 init_stmt = gsi_stmt (gsi2);
1380 gsi_remove (&gsi2, false);
1381 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1385 val = build_vector_from_val (type, val);
1388 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1389 init_stmt = gimple_build_assign (new_temp, val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 return new_temp;
1395 /* Function vect_get_vec_defs_for_operand.
1397 OP is an operand in STMT_VINFO. This function returns a vector of
1398 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1400 In the case that OP is an SSA_NAME which is defined in the loop, then
1401 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1403 In case OP is an invariant or constant, a new stmt that creates a vector def
1404 needs to be introduced. VECTYPE may be used to specify a required type for
1405 vector invariant. */
1407 void
1408 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1409 unsigned ncopies,
1410 tree op, vec<tree> *vec_oprnds, tree vectype)
1412 gimple *def_stmt;
1413 enum vect_def_type dt;
1414 bool is_simple_use;
1415 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1417 if (dump_enabled_p ())
1418 dump_printf_loc (MSG_NOTE, vect_location,
1419 "vect_get_vec_defs_for_operand: %T\n", op);
1421 stmt_vec_info def_stmt_info;
1422 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1423 &def_stmt_info, &def_stmt);
1424 gcc_assert (is_simple_use);
1425 if (def_stmt && dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1428 vec_oprnds->create (ncopies);
1429 if (dt == vect_constant_def || dt == vect_external_def)
1431 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1432 tree vector_type;
1434 if (vectype)
1435 vector_type = vectype;
1436 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1437 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1438 vector_type = truth_type_for (stmt_vectype);
1439 else
1440 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1442 gcc_assert (vector_type);
1443 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1444 while (ncopies--)
1445 vec_oprnds->quick_push (vop);
1447 else
1449 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1450 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1451 for (unsigned i = 0; i < ncopies; ++i)
1452 vec_oprnds->quick_push (gimple_get_lhs
1453 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1458 /* Get vectorized definitions for OP0 and OP1. */
1460 void
1461 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1462 unsigned ncopies,
1463 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1464 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1465 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1466 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1468 if (slp_node)
1470 if (op0)
1471 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1472 if (op1)
1473 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1474 if (op2)
1475 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1476 if (op3)
1477 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1479 else
1481 if (op0)
1482 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1483 op0, vec_oprnds0, vectype0);
1484 if (op1)
1485 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1486 op1, vec_oprnds1, vectype1);
1487 if (op2)
1488 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1489 op2, vec_oprnds2, vectype2);
1490 if (op3)
1491 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1492 op3, vec_oprnds3, vectype3);
1496 void
1497 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1498 unsigned ncopies,
1499 tree op0, vec<tree> *vec_oprnds0,
1500 tree op1, vec<tree> *vec_oprnds1,
1501 tree op2, vec<tree> *vec_oprnds2,
1502 tree op3, vec<tree> *vec_oprnds3)
1504 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1505 op0, vec_oprnds0, NULL_TREE,
1506 op1, vec_oprnds1, NULL_TREE,
1507 op2, vec_oprnds2, NULL_TREE,
1508 op3, vec_oprnds3, NULL_TREE);
1511 /* Helper function called by vect_finish_replace_stmt and
1512 vect_finish_stmt_generation. Set the location of the new
1513 statement and create and return a stmt_vec_info for it. */
1515 static void
1516 vect_finish_stmt_generation_1 (vec_info *,
1517 stmt_vec_info stmt_info, gimple *vec_stmt)
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1522 if (stmt_info)
1524 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1526 /* While EH edges will generally prevent vectorization, stmt might
1527 e.g. be in a must-not-throw region. Ensure newly created stmts
1528 that could throw are part of the same region. */
1529 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1530 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1531 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1533 else
1534 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1537 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1538 which sets the same scalar result as STMT_INFO did. Create and return a
1539 stmt_vec_info for VEC_STMT. */
1541 void
1542 vect_finish_replace_stmt (vec_info *vinfo,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1545 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1546 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1548 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1549 gsi_replace (&gsi, vec_stmt, true);
1551 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1554 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1555 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1557 void
1558 vect_finish_stmt_generation (vec_info *vinfo,
1559 stmt_vec_info stmt_info, gimple *vec_stmt,
1560 gimple_stmt_iterator *gsi)
1562 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1564 if (!gsi_end_p (*gsi)
1565 && gimple_has_mem_ops (vec_stmt))
1567 gimple *at_stmt = gsi_stmt (*gsi);
1568 tree vuse = gimple_vuse (at_stmt);
1569 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1571 tree vdef = gimple_vdef (at_stmt);
1572 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1573 gimple_set_modified (vec_stmt, true);
1574 /* If we have an SSA vuse and insert a store, update virtual
1575 SSA form to avoid triggering the renamer. Do so only
1576 if we can easily see all uses - which is what almost always
1577 happens with the way vectorized stmts are inserted. */
1578 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1579 && ((is_gimple_assign (vec_stmt)
1580 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1581 || (is_gimple_call (vec_stmt)
1582 && !(gimple_call_flags (vec_stmt)
1583 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1585 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1586 gimple_set_vdef (vec_stmt, new_vdef);
1587 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1591 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1592 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1595 /* We want to vectorize a call to combined function CFN with function
1596 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1597 as the types of all inputs. Check whether this is possible using
1598 an internal function, returning its code if so or IFN_LAST if not. */
1600 static internal_fn
1601 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1602 tree vectype_out, tree vectype_in)
1604 internal_fn ifn;
1605 if (internal_fn_p (cfn))
1606 ifn = as_internal_fn (cfn);
1607 else
1608 ifn = associated_internal_fn (fndecl);
1609 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1611 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1612 if (info.vectorizable)
1614 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1615 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1616 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1617 OPTIMIZE_FOR_SPEED))
1618 return ifn;
1621 return IFN_LAST;
1625 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1626 gimple_stmt_iterator *);
1628 /* Check whether a load or store statement in the loop described by
1629 LOOP_VINFO is possible in a loop using partial vectors. This is
1630 testing whether the vectorizer pass has the appropriate support,
1631 as well as whether the target does.
1633 VLS_TYPE says whether the statement is a load or store and VECTYPE
1634 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1635 says how the load or store is going to be implemented and GROUP_SIZE
1636 is the number of load or store statements in the containing group.
1637 If the access is a gather load or scatter store, GS_INFO describes
1638 its arguments. If the load or store is conditional, SCALAR_MASK is the
1639 condition under which it occurs.
1641 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1642 vectors is not supported, otherwise record the required rgroup control
1643 types. */
1645 static void
1646 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1647 vec_load_store_type vls_type,
1648 int group_size,
1649 vect_memory_access_type
1650 memory_access_type,
1651 gather_scatter_info *gs_info,
1652 tree scalar_mask)
1654 /* Invariant loads need no special support. */
1655 if (memory_access_type == VMAT_INVARIANT)
1656 return;
1658 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1659 machine_mode vecmode = TYPE_MODE (vectype);
1660 bool is_load = (vls_type == VLS_LOAD);
1661 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1663 if (is_load
1664 ? !vect_load_lanes_supported (vectype, group_size, true)
1665 : !vect_store_lanes_supported (vectype, group_size, true))
1667 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1669 "can't operate on partial vectors because"
1670 " the target doesn't have an appropriate"
1671 " load/store-lanes instruction.\n");
1672 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1673 return;
1675 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1676 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1677 return;
1680 if (memory_access_type == VMAT_GATHER_SCATTER)
1682 internal_fn ifn = (is_load
1683 ? IFN_MASK_GATHER_LOAD
1684 : IFN_MASK_SCATTER_STORE);
1685 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1686 gs_info->memory_type,
1687 gs_info->offset_vectype,
1688 gs_info->scale))
1690 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692 "can't operate on partial vectors because"
1693 " the target doesn't have an appropriate"
1694 " gather load or scatter store instruction.\n");
1695 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1696 return;
1698 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1699 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1700 return;
1703 if (memory_access_type != VMAT_CONTIGUOUS
1704 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1706 /* Element X of the data must come from iteration i * VF + X of the
1707 scalar loop. We need more work to support other mappings. */
1708 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1710 "can't operate on partial vectors because an"
1711 " access isn't contiguous.\n");
1712 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1713 return;
1716 if (!VECTOR_MODE_P (vecmode))
1718 if (dump_enabled_p ())
1719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1720 "can't operate on partial vectors when emulating"
1721 " vector operations.\n");
1722 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1723 return;
1726 /* We might load more scalars than we need for permuting SLP loads.
1727 We checked in get_group_load_store_type that the extra elements
1728 don't leak into a new vector. */
1729 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1731 unsigned int nvectors;
1732 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1733 return nvectors;
1734 gcc_unreachable ();
1737 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1738 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1739 machine_mode mask_mode;
1740 bool using_partial_vectors_p = false;
1741 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1742 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1744 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1745 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1746 using_partial_vectors_p = true;
1749 machine_mode vmode;
1750 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1752 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1753 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1754 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1755 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1756 using_partial_vectors_p = true;
1759 if (!using_partial_vectors_p)
1761 if (dump_enabled_p ())
1762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1763 "can't operate on partial vectors because the"
1764 " target doesn't have the appropriate partial"
1765 " vectorization load or store.\n");
1766 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1770 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1771 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1772 that needs to be applied to all loads and stores in a vectorized loop.
1773 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1775 MASK_TYPE is the type of both masks. If new statements are needed,
1776 insert them before GSI. */
1778 static tree
1779 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1780 gimple_stmt_iterator *gsi)
1782 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1783 if (!loop_mask)
1784 return vec_mask;
1786 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1787 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1788 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1789 vec_mask, loop_mask);
1790 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1791 return and_res;
1794 /* Determine whether we can use a gather load or scatter store to vectorize
1795 strided load or store STMT_INFO by truncating the current offset to a
1796 smaller width. We need to be able to construct an offset vector:
1798 { 0, X, X*2, X*3, ... }
1800 without loss of precision, where X is STMT_INFO's DR_STEP.
1802 Return true if this is possible, describing the gather load or scatter
1803 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1805 static bool
1806 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1807 loop_vec_info loop_vinfo, bool masked_p,
1808 gather_scatter_info *gs_info)
1810 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1811 data_reference *dr = dr_info->dr;
1812 tree step = DR_STEP (dr);
1813 if (TREE_CODE (step) != INTEGER_CST)
1815 /* ??? Perhaps we could use range information here? */
1816 if (dump_enabled_p ())
1817 dump_printf_loc (MSG_NOTE, vect_location,
1818 "cannot truncate variable step.\n");
1819 return false;
1822 /* Get the number of bits in an element. */
1823 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1824 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1825 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1827 /* Set COUNT to the upper limit on the number of elements - 1.
1828 Start with the maximum vectorization factor. */
1829 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1831 /* Try lowering COUNT to the number of scalar latch iterations. */
1832 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1833 widest_int max_iters;
1834 if (max_loop_iterations (loop, &max_iters)
1835 && max_iters < count)
1836 count = max_iters.to_shwi ();
1838 /* Try scales of 1 and the element size. */
1839 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1840 wi::overflow_type overflow = wi::OVF_NONE;
1841 for (int i = 0; i < 2; ++i)
1843 int scale = scales[i];
1844 widest_int factor;
1845 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1846 continue;
1848 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1849 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1850 if (overflow)
1851 continue;
1852 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1853 unsigned int min_offset_bits = wi::min_precision (range, sign);
1855 /* Find the narrowest viable offset type. */
1856 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1857 tree offset_type = build_nonstandard_integer_type (offset_bits,
1858 sign == UNSIGNED);
1860 /* See whether the target supports the operation with an offset
1861 no narrower than OFFSET_TYPE. */
1862 tree memory_type = TREE_TYPE (DR_REF (dr));
1863 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1864 vectype, memory_type, offset_type, scale,
1865 &gs_info->ifn, &gs_info->offset_vectype))
1866 continue;
1868 gs_info->decl = NULL_TREE;
1869 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1870 but we don't need to store that here. */
1871 gs_info->base = NULL_TREE;
1872 gs_info->element_type = TREE_TYPE (vectype);
1873 gs_info->offset = fold_convert (offset_type, step);
1874 gs_info->offset_dt = vect_constant_def;
1875 gs_info->scale = scale;
1876 gs_info->memory_type = memory_type;
1877 return true;
1880 if (overflow && dump_enabled_p ())
1881 dump_printf_loc (MSG_NOTE, vect_location,
1882 "truncating gather/scatter offset to %d bits"
1883 " might change its value.\n", element_bits);
1885 return false;
1888 /* Return true if we can use gather/scatter internal functions to
1889 vectorize STMT_INFO, which is a grouped or strided load or store.
1890 MASKED_P is true if load or store is conditional. When returning
1891 true, fill in GS_INFO with the information required to perform the
1892 operation. */
1894 static bool
1895 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1896 loop_vec_info loop_vinfo, bool masked_p,
1897 gather_scatter_info *gs_info)
1899 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1900 || gs_info->decl)
1901 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1902 masked_p, gs_info);
1904 tree old_offset_type = TREE_TYPE (gs_info->offset);
1905 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1907 gcc_assert (TYPE_PRECISION (new_offset_type)
1908 >= TYPE_PRECISION (old_offset_type));
1909 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1911 if (dump_enabled_p ())
1912 dump_printf_loc (MSG_NOTE, vect_location,
1913 "using gather/scatter for strided/grouped access,"
1914 " scale = %d\n", gs_info->scale);
1916 return true;
1919 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1920 elements with a known constant step. Return -1 if that step
1921 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1923 static int
1924 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1926 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1927 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1928 size_zero_node);
1931 /* If the target supports a permute mask that reverses the elements in
1932 a vector of type VECTYPE, return that mask, otherwise return null. */
1934 static tree
1935 perm_mask_for_reverse (tree vectype)
1937 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1939 /* The encoding has a single stepped pattern. */
1940 vec_perm_builder sel (nunits, 1, 3);
1941 for (int i = 0; i < 3; ++i)
1942 sel.quick_push (nunits - 1 - i);
1944 vec_perm_indices indices (sel, 1, nunits);
1945 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1946 return NULL_TREE;
1947 return vect_gen_perm_mask_checked (vectype, indices);
1950 /* A subroutine of get_load_store_type, with a subset of the same
1951 arguments. Handle the case where STMT_INFO is a load or store that
1952 accesses consecutive elements with a negative step. */
1954 static vect_memory_access_type
1955 get_negative_load_store_type (vec_info *vinfo,
1956 stmt_vec_info stmt_info, tree vectype,
1957 vec_load_store_type vls_type,
1958 unsigned int ncopies)
1960 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1961 dr_alignment_support alignment_support_scheme;
1963 if (ncopies > 1)
1965 if (dump_enabled_p ())
1966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1967 "multiple types with negative step.\n");
1968 return VMAT_ELEMENTWISE;
1971 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1972 dr_info, false);
1973 if (alignment_support_scheme != dr_aligned
1974 && alignment_support_scheme != dr_unaligned_supported)
1976 if (dump_enabled_p ())
1977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1978 "negative step but alignment required.\n");
1979 return VMAT_ELEMENTWISE;
1982 if (vls_type == VLS_STORE_INVARIANT)
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_NOTE, vect_location,
1986 "negative step with invariant source;"
1987 " no permute needed.\n");
1988 return VMAT_CONTIGUOUS_DOWN;
1991 if (!perm_mask_for_reverse (vectype))
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 "negative step and reversing not supported.\n");
1996 return VMAT_ELEMENTWISE;
1999 return VMAT_CONTIGUOUS_REVERSE;
2002 /* STMT_INFO is either a masked or unconditional store. Return the value
2003 being stored. */
2005 tree
2006 vect_get_store_rhs (stmt_vec_info stmt_info)
2008 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2010 gcc_assert (gimple_assign_single_p (assign));
2011 return gimple_assign_rhs1 (assign);
2013 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2015 internal_fn ifn = gimple_call_internal_fn (call);
2016 int index = internal_fn_stored_value_index (ifn);
2017 gcc_assert (index >= 0);
2018 return gimple_call_arg (call, index);
2020 gcc_unreachable ();
2023 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2025 This function returns a vector type which can be composed with NETLS pieces,
2026 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2027 same vector size as the return vector. It checks target whether supports
2028 pieces-size vector mode for construction firstly, if target fails to, check
2029 pieces-size scalar mode for construction further. It returns NULL_TREE if
2030 fails to find the available composition.
2032 For example, for (vtype=V16QI, nelts=4), we can probably get:
2033 - V16QI with PTYPE V4QI.
2034 - V4SI with PTYPE SI.
2035 - NULL_TREE. */
2037 static tree
2038 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2040 gcc_assert (VECTOR_TYPE_P (vtype));
2041 gcc_assert (known_gt (nelts, 0U));
2043 machine_mode vmode = TYPE_MODE (vtype);
2044 if (!VECTOR_MODE_P (vmode))
2045 return NULL_TREE;
2047 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2048 unsigned int pbsize;
2049 if (constant_multiple_p (vbsize, nelts, &pbsize))
2051 /* First check if vec_init optab supports construction from
2052 vector pieces directly. */
2053 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2054 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2055 machine_mode rmode;
2056 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2057 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2058 != CODE_FOR_nothing))
2060 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2061 return vtype;
2064 /* Otherwise check if exists an integer type of the same piece size and
2065 if vec_init optab supports construction from it directly. */
2066 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2067 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2068 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2069 != CODE_FOR_nothing))
2071 *ptype = build_nonstandard_integer_type (pbsize, 1);
2072 return build_vector_type (*ptype, nelts);
2076 return NULL_TREE;
2079 /* A subroutine of get_load_store_type, with a subset of the same
2080 arguments. Handle the case where STMT_INFO is part of a grouped load
2081 or store.
2083 For stores, the statements in the group are all consecutive
2084 and there is no gap at the end. For loads, the statements in the
2085 group might not be consecutive; there can be gaps between statements
2086 as well as at the end. */
2088 static bool
2089 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2090 tree vectype, slp_tree slp_node,
2091 bool masked_p, vec_load_store_type vls_type,
2092 vect_memory_access_type *memory_access_type,
2093 dr_alignment_support *alignment_support_scheme,
2094 gather_scatter_info *gs_info)
2096 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2097 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2098 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2099 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2100 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2101 bool single_element_p = (stmt_info == first_stmt_info
2102 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2103 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2104 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2106 /* True if the vectorized statements would access beyond the last
2107 statement in the group. */
2108 bool overrun_p = false;
2110 /* True if we can cope with such overrun by peeling for gaps, so that
2111 there is at least one final scalar iteration after the vector loop. */
2112 bool can_overrun_p = (!masked_p
2113 && vls_type == VLS_LOAD
2114 && loop_vinfo
2115 && !loop->inner);
2117 /* There can only be a gap at the end of the group if the stride is
2118 known at compile time. */
2119 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2121 /* Stores can't yet have gaps. */
2122 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2124 if (slp_node)
2126 /* For SLP vectorization we directly vectorize a subchain
2127 without permutation. */
2128 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2129 first_dr_info
2130 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2131 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2133 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2134 separated by the stride, until we have a complete vector.
2135 Fall back to scalar accesses if that isn't possible. */
2136 if (multiple_p (nunits, group_size))
2137 *memory_access_type = VMAT_STRIDED_SLP;
2138 else
2139 *memory_access_type = VMAT_ELEMENTWISE;
2141 else
2143 overrun_p = loop_vinfo && gap != 0;
2144 if (overrun_p && vls_type != VLS_LOAD)
2146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2147 "Grouped store with gaps requires"
2148 " non-consecutive accesses\n");
2149 return false;
2151 /* An overrun is fine if the trailing elements are smaller
2152 than the alignment boundary B. Every vector access will
2153 be a multiple of B and so we are guaranteed to access a
2154 non-gap element in the same B-sized block. */
2155 if (overrun_p
2156 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2157 / vect_get_scalar_dr_size (first_dr_info)))
2158 overrun_p = false;
2160 /* If the gap splits the vector in half and the target
2161 can do half-vector operations avoid the epilogue peeling
2162 by simply loading half of the vector only. Usually
2163 the construction with an upper zero half will be elided. */
2164 dr_alignment_support alignment_support_scheme;
2165 tree half_vtype;
2166 if (overrun_p
2167 && !masked_p
2168 && (((alignment_support_scheme
2169 = vect_supportable_dr_alignment (vinfo,
2170 first_dr_info, false)))
2171 == dr_aligned
2172 || alignment_support_scheme == dr_unaligned_supported)
2173 && known_eq (nunits, (group_size - gap) * 2)
2174 && known_eq (nunits, group_size)
2175 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2176 != NULL_TREE))
2177 overrun_p = false;
2179 if (overrun_p && !can_overrun_p)
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "Peeling for outer loop is not supported\n");
2184 return false;
2186 int cmp = compare_step_with_zero (vinfo, stmt_info);
2187 if (cmp < 0)
2189 if (single_element_p)
2190 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2191 only correct for single element "interleaving" SLP. */
2192 *memory_access_type = get_negative_load_store_type
2193 (vinfo, stmt_info, vectype, vls_type, 1);
2194 else
2196 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2197 separated by the stride, until we have a complete vector.
2198 Fall back to scalar accesses if that isn't possible. */
2199 if (multiple_p (nunits, group_size))
2200 *memory_access_type = VMAT_STRIDED_SLP;
2201 else
2202 *memory_access_type = VMAT_ELEMENTWISE;
2205 else
2207 gcc_assert (!loop_vinfo || cmp > 0);
2208 *memory_access_type = VMAT_CONTIGUOUS;
2212 else
2214 /* We can always handle this case using elementwise accesses,
2215 but see if something more efficient is available. */
2216 *memory_access_type = VMAT_ELEMENTWISE;
2218 /* If there is a gap at the end of the group then these optimizations
2219 would access excess elements in the last iteration. */
2220 bool would_overrun_p = (gap != 0);
2221 /* An overrun is fine if the trailing elements are smaller than the
2222 alignment boundary B. Every vector access will be a multiple of B
2223 and so we are guaranteed to access a non-gap element in the
2224 same B-sized block. */
2225 if (would_overrun_p
2226 && !masked_p
2227 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2228 / vect_get_scalar_dr_size (first_dr_info)))
2229 would_overrun_p = false;
2231 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2232 && (can_overrun_p || !would_overrun_p)
2233 && compare_step_with_zero (vinfo, stmt_info) > 0)
2235 /* First cope with the degenerate case of a single-element
2236 vector. */
2237 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2240 /* Otherwise try using LOAD/STORE_LANES. */
2241 else if (vls_type == VLS_LOAD
2242 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2243 : vect_store_lanes_supported (vectype, group_size,
2244 masked_p))
2246 *memory_access_type = VMAT_LOAD_STORE_LANES;
2247 overrun_p = would_overrun_p;
2250 /* If that fails, try using permuting loads. */
2251 else if (vls_type == VLS_LOAD
2252 ? vect_grouped_load_supported (vectype, single_element_p,
2253 group_size)
2254 : vect_grouped_store_supported (vectype, group_size))
2256 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2257 overrun_p = would_overrun_p;
2261 /* As a last resort, trying using a gather load or scatter store.
2263 ??? Although the code can handle all group sizes correctly,
2264 it probably isn't a win to use separate strided accesses based
2265 on nearby locations. Or, even if it's a win over scalar code,
2266 it might not be a win over vectorizing at a lower VF, if that
2267 allows us to use contiguous accesses. */
2268 if (*memory_access_type == VMAT_ELEMENTWISE
2269 && single_element_p
2270 && loop_vinfo
2271 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2272 masked_p, gs_info))
2273 *memory_access_type = VMAT_GATHER_SCATTER;
2276 if (*memory_access_type == VMAT_GATHER_SCATTER
2277 || *memory_access_type == VMAT_ELEMENTWISE)
2278 *alignment_support_scheme = dr_unaligned_supported;
2279 else
2280 *alignment_support_scheme
2281 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2283 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2285 /* STMT is the leader of the group. Check the operands of all the
2286 stmts of the group. */
2287 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2288 while (next_stmt_info)
2290 tree op = vect_get_store_rhs (next_stmt_info);
2291 enum vect_def_type dt;
2292 if (!vect_is_simple_use (op, vinfo, &dt))
2294 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2296 "use not simple.\n");
2297 return false;
2299 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2303 if (overrun_p)
2305 gcc_assert (can_overrun_p);
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 "Data access with gaps requires scalar "
2309 "epilogue loop\n");
2310 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2313 return true;
2316 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2317 if there is a memory access type that the vectorized form can use,
2318 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2319 or scatters, fill in GS_INFO accordingly. In addition
2320 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2321 the target does not support the alignment scheme.
2323 SLP says whether we're performing SLP rather than loop vectorization.
2324 MASKED_P is true if the statement is conditional on a vectorized mask.
2325 VECTYPE is the vector type that the vectorized statements will use.
2326 NCOPIES is the number of vector statements that will be needed. */
2328 static bool
2329 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2330 tree vectype, slp_tree slp_node,
2331 bool masked_p, vec_load_store_type vls_type,
2332 unsigned int ncopies,
2333 vect_memory_access_type *memory_access_type,
2334 dr_alignment_support *alignment_support_scheme,
2335 gather_scatter_info *gs_info)
2337 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2338 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2339 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2341 *memory_access_type = VMAT_GATHER_SCATTER;
2342 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2343 gcc_unreachable ();
2344 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2345 &gs_info->offset_dt,
2346 &gs_info->offset_vectype))
2348 if (dump_enabled_p ())
2349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2350 "%s index use not simple.\n",
2351 vls_type == VLS_LOAD ? "gather" : "scatter");
2352 return false;
2354 /* Gather-scatter accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme = dr_unaligned_supported;
2358 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2360 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2361 masked_p,
2362 vls_type, memory_access_type,
2363 alignment_support_scheme, gs_info))
2364 return false;
2366 else if (STMT_VINFO_STRIDED_P (stmt_info))
2368 gcc_assert (!slp_node);
2369 if (loop_vinfo
2370 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2371 masked_p, gs_info))
2372 *memory_access_type = VMAT_GATHER_SCATTER;
2373 else
2374 *memory_access_type = VMAT_ELEMENTWISE;
2375 /* Alignment is irrelevant here. */
2376 *alignment_support_scheme = dr_unaligned_supported;
2378 else
2380 int cmp = compare_step_with_zero (vinfo, stmt_info);
2381 if (cmp == 0)
2383 gcc_assert (vls_type == VLS_LOAD);
2384 *memory_access_type = VMAT_INVARIANT;
2385 /* Invariant accesses perform only component accesses, alignment
2386 is irrelevant for them. */
2387 *alignment_support_scheme = dr_unaligned_supported;
2389 else
2391 if (cmp < 0)
2392 *memory_access_type = get_negative_load_store_type
2393 (vinfo, stmt_info, vectype, vls_type, ncopies);
2394 else
2395 *memory_access_type = VMAT_CONTIGUOUS;
2396 *alignment_support_scheme
2397 = vect_supportable_dr_alignment (vinfo,
2398 STMT_VINFO_DR_INFO (stmt_info),
2399 false);
2403 if ((*memory_access_type == VMAT_ELEMENTWISE
2404 || *memory_access_type == VMAT_STRIDED_SLP)
2405 && !nunits.is_constant ())
2407 if (dump_enabled_p ())
2408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2409 "Not using elementwise accesses due to variable "
2410 "vectorization factor.\n");
2411 return false;
2414 if (*alignment_support_scheme == dr_unaligned_unsupported)
2416 if (dump_enabled_p ())
2417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2418 "unsupported unaligned access\n");
2419 return false;
2422 /* FIXME: At the moment the cost model seems to underestimate the
2423 cost of using elementwise accesses. This check preserves the
2424 traditional behavior until that can be fixed. */
2425 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2426 if (!first_stmt_info)
2427 first_stmt_info = stmt_info;
2428 if (*memory_access_type == VMAT_ELEMENTWISE
2429 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2430 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2431 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2432 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2434 if (dump_enabled_p ())
2435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2436 "not falling back to elementwise accesses\n");
2437 return false;
2439 return true;
2442 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2443 conditional operation STMT_INFO. When returning true, store the mask
2444 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2445 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2446 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2448 static bool
2449 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2450 slp_tree slp_node, unsigned mask_index,
2451 tree *mask, slp_tree *mask_node,
2452 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2454 enum vect_def_type mask_dt;
2455 tree mask_vectype;
2456 slp_tree mask_node_1;
2457 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2458 mask, &mask_node_1, &mask_dt, &mask_vectype))
2460 if (dump_enabled_p ())
2461 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2462 "mask use not simple.\n");
2463 return false;
2466 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2468 if (dump_enabled_p ())
2469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2470 "mask argument is not a boolean.\n");
2471 return false;
2474 if (TREE_CODE (*mask) != SSA_NAME)
2476 if (dump_enabled_p ())
2477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2478 "mask argument is not an SSA name.\n");
2479 return false;
2482 /* If the caller is not prepared for adjusting an external/constant
2483 SLP mask vector type fail. */
2484 if (slp_node
2485 && !mask_node
2486 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2488 if (dump_enabled_p ())
2489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2490 "SLP mask argument is not vectorized.\n");
2491 return false;
2494 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2495 if (!mask_vectype)
2496 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2498 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2500 if (dump_enabled_p ())
2501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2502 "could not find an appropriate vector mask type.\n");
2503 return false;
2506 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2507 TYPE_VECTOR_SUBPARTS (vectype)))
2509 if (dump_enabled_p ())
2510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2511 "vector mask type %T"
2512 " does not match vector data type %T.\n",
2513 mask_vectype, vectype);
2515 return false;
2518 *mask_dt_out = mask_dt;
2519 *mask_vectype_out = mask_vectype;
2520 if (mask_node)
2521 *mask_node = mask_node_1;
2522 return true;
2525 /* Return true if stored value RHS is suitable for vectorizing store
2526 statement STMT_INFO. When returning true, store the type of the
2527 definition in *RHS_DT_OUT, the type of the vectorized store value in
2528 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2530 static bool
2531 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2532 slp_tree slp_node, tree rhs,
2533 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2534 vec_load_store_type *vls_type_out)
2536 /* In the case this is a store from a constant make sure
2537 native_encode_expr can handle it. */
2538 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2540 if (dump_enabled_p ())
2541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2542 "cannot encode constant as a byte sequence.\n");
2543 return false;
2546 unsigned op_no = 0;
2547 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2549 if (gimple_call_internal_p (call)
2550 && internal_store_fn_p (gimple_call_internal_fn (call)))
2551 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2554 enum vect_def_type rhs_dt;
2555 tree rhs_vectype;
2556 slp_tree slp_op;
2557 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2558 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2560 if (dump_enabled_p ())
2561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2562 "use not simple.\n");
2563 return false;
2566 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2567 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2569 if (dump_enabled_p ())
2570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2571 "incompatible vector types.\n");
2572 return false;
2575 *rhs_dt_out = rhs_dt;
2576 *rhs_vectype_out = rhs_vectype;
2577 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2578 *vls_type_out = VLS_STORE_INVARIANT;
2579 else
2580 *vls_type_out = VLS_STORE;
2581 return true;
2584 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2585 Note that we support masks with floating-point type, in which case the
2586 floats are interpreted as a bitmask. */
2588 static tree
2589 vect_build_all_ones_mask (vec_info *vinfo,
2590 stmt_vec_info stmt_info, tree masktype)
2592 if (TREE_CODE (masktype) == INTEGER_TYPE)
2593 return build_int_cst (masktype, -1);
2594 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2596 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2597 mask = build_vector_from_val (masktype, mask);
2598 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2600 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2602 REAL_VALUE_TYPE r;
2603 long tmp[6];
2604 for (int j = 0; j < 6; ++j)
2605 tmp[j] = -1;
2606 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2607 tree mask = build_real (TREE_TYPE (masktype), r);
2608 mask = build_vector_from_val (masktype, mask);
2609 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2611 gcc_unreachable ();
2614 /* Build an all-zero merge value of type VECTYPE while vectorizing
2615 STMT_INFO as a gather load. */
2617 static tree
2618 vect_build_zero_merge_argument (vec_info *vinfo,
2619 stmt_vec_info stmt_info, tree vectype)
2621 tree merge;
2622 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2623 merge = build_int_cst (TREE_TYPE (vectype), 0);
2624 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2626 REAL_VALUE_TYPE r;
2627 long tmp[6];
2628 for (int j = 0; j < 6; ++j)
2629 tmp[j] = 0;
2630 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2631 merge = build_real (TREE_TYPE (vectype), r);
2633 else
2634 gcc_unreachable ();
2635 merge = build_vector_from_val (vectype, merge);
2636 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2639 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2640 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2641 the gather load operation. If the load is conditional, MASK is the
2642 unvectorized condition and MASK_DT is its definition type, otherwise
2643 MASK is null. */
2645 static void
2646 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2647 gimple_stmt_iterator *gsi,
2648 gimple **vec_stmt,
2649 gather_scatter_info *gs_info,
2650 tree mask)
2652 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2653 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2654 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2655 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2656 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2657 edge pe = loop_preheader_edge (loop);
2658 enum { NARROW, NONE, WIDEN } modifier;
2659 poly_uint64 gather_off_nunits
2660 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2662 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2663 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2664 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2665 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2666 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2667 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2668 tree scaletype = TREE_VALUE (arglist);
2669 tree real_masktype = masktype;
2670 gcc_checking_assert (types_compatible_p (srctype, rettype)
2671 && (!mask
2672 || TREE_CODE (masktype) == INTEGER_TYPE
2673 || types_compatible_p (srctype, masktype)));
2674 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2675 masktype = truth_type_for (srctype);
2677 tree mask_halftype = masktype;
2678 tree perm_mask = NULL_TREE;
2679 tree mask_perm_mask = NULL_TREE;
2680 if (known_eq (nunits, gather_off_nunits))
2681 modifier = NONE;
2682 else if (known_eq (nunits * 2, gather_off_nunits))
2684 modifier = WIDEN;
2686 /* Currently widening gathers and scatters are only supported for
2687 fixed-length vectors. */
2688 int count = gather_off_nunits.to_constant ();
2689 vec_perm_builder sel (count, count, 1);
2690 for (int i = 0; i < count; ++i)
2691 sel.quick_push (i | (count / 2));
2693 vec_perm_indices indices (sel, 1, count);
2694 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2695 indices);
2697 else if (known_eq (nunits, gather_off_nunits * 2))
2699 modifier = NARROW;
2701 /* Currently narrowing gathers and scatters are only supported for
2702 fixed-length vectors. */
2703 int count = nunits.to_constant ();
2704 vec_perm_builder sel (count, count, 1);
2705 sel.quick_grow (count);
2706 for (int i = 0; i < count; ++i)
2707 sel[i] = i < count / 2 ? i : i + count / 2;
2708 vec_perm_indices indices (sel, 2, count);
2709 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2711 ncopies *= 2;
2713 if (mask && masktype == real_masktype)
2715 for (int i = 0; i < count; ++i)
2716 sel[i] = i | (count / 2);
2717 indices.new_vector (sel, 2, count);
2718 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2720 else if (mask)
2721 mask_halftype = truth_type_for (gs_info->offset_vectype);
2723 else
2724 gcc_unreachable ();
2726 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2727 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2729 tree ptr = fold_convert (ptrtype, gs_info->base);
2730 if (!is_gimple_min_invariant (ptr))
2732 gimple_seq seq;
2733 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2734 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2735 gcc_assert (!new_bb);
2738 tree scale = build_int_cst (scaletype, gs_info->scale);
2740 tree vec_oprnd0 = NULL_TREE;
2741 tree vec_mask = NULL_TREE;
2742 tree src_op = NULL_TREE;
2743 tree mask_op = NULL_TREE;
2744 tree prev_res = NULL_TREE;
2746 if (!mask)
2748 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2749 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2752 auto_vec<tree> vec_oprnds0;
2753 auto_vec<tree> vec_masks;
2754 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2755 modifier == WIDEN ? ncopies / 2 : ncopies,
2756 gs_info->offset, &vec_oprnds0);
2757 if (mask)
2758 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2759 modifier == NARROW ? ncopies / 2 : ncopies,
2760 mask, &vec_masks);
2761 for (int j = 0; j < ncopies; ++j)
2763 tree op, var;
2764 if (modifier == WIDEN && (j & 1))
2765 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2766 perm_mask, stmt_info, gsi);
2767 else
2768 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2770 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2772 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2773 TYPE_VECTOR_SUBPARTS (idxtype)));
2774 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2775 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2776 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2777 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2778 op = var;
2781 if (mask)
2783 if (mask_perm_mask && (j & 1))
2784 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2785 mask_perm_mask, stmt_info, gsi);
2786 else
2788 if (modifier == NARROW)
2790 if ((j & 1) == 0)
2791 vec_mask = vec_masks[j / 2];
2793 else
2794 vec_mask = vec_masks[j];
2796 mask_op = vec_mask;
2797 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2799 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2800 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2801 gcc_assert (known_eq (sub1, sub2));
2802 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2803 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2804 gassign *new_stmt
2805 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2806 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2807 mask_op = var;
2810 if (modifier == NARROW && masktype != real_masktype)
2812 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2813 gassign *new_stmt
2814 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2815 : VEC_UNPACK_LO_EXPR,
2816 mask_op);
2817 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2818 mask_op = var;
2820 src_op = mask_op;
2823 tree mask_arg = mask_op;
2824 if (masktype != real_masktype)
2826 tree utype, optype = TREE_TYPE (mask_op);
2827 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2828 utype = real_masktype;
2829 else
2830 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2831 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2832 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2833 gassign *new_stmt
2834 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2835 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2836 mask_arg = var;
2837 if (!useless_type_conversion_p (real_masktype, utype))
2839 gcc_assert (TYPE_PRECISION (utype)
2840 <= TYPE_PRECISION (real_masktype));
2841 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2842 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2843 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2844 mask_arg = var;
2846 src_op = build_zero_cst (srctype);
2848 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2849 mask_arg, scale);
2851 if (!useless_type_conversion_p (vectype, rettype))
2853 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2854 TYPE_VECTOR_SUBPARTS (rettype)));
2855 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2856 gimple_call_set_lhs (new_stmt, op);
2857 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2858 var = make_ssa_name (vec_dest);
2859 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2860 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2861 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2863 else
2865 var = make_ssa_name (vec_dest, new_stmt);
2866 gimple_call_set_lhs (new_stmt, var);
2867 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2870 if (modifier == NARROW)
2872 if ((j & 1) == 0)
2874 prev_res = var;
2875 continue;
2877 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2878 stmt_info, gsi);
2879 new_stmt = SSA_NAME_DEF_STMT (var);
2882 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2884 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2887 /* Prepare the base and offset in GS_INFO for vectorization.
2888 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2889 to the vectorized offset argument for the first copy of STMT_INFO.
2890 STMT_INFO is the statement described by GS_INFO and LOOP is the
2891 containing loop. */
2893 static void
2894 vect_get_gather_scatter_ops (vec_info *vinfo,
2895 class loop *loop, stmt_vec_info stmt_info,
2896 gather_scatter_info *gs_info,
2897 tree *dataref_ptr, vec<tree> *vec_offset,
2898 unsigned ncopies)
2900 gimple_seq stmts = NULL;
2901 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2902 if (stmts != NULL)
2904 basic_block new_bb;
2905 edge pe = loop_preheader_edge (loop);
2906 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2907 gcc_assert (!new_bb);
2909 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2910 vec_offset, gs_info->offset_vectype);
2913 /* Prepare to implement a grouped or strided load or store using
2914 the gather load or scatter store operation described by GS_INFO.
2915 STMT_INFO is the load or store statement.
2917 Set *DATAREF_BUMP to the amount that should be added to the base
2918 address after each copy of the vectorized statement. Set *VEC_OFFSET
2919 to an invariant offset vector in which element I has the value
2920 I * DR_STEP / SCALE. */
2922 static void
2923 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2924 loop_vec_info loop_vinfo,
2925 gather_scatter_info *gs_info,
2926 tree *dataref_bump, tree *vec_offset)
2928 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2929 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2931 tree bump = size_binop (MULT_EXPR,
2932 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2933 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2934 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2936 /* The offset given in GS_INFO can have pointer type, so use the element
2937 type of the vector instead. */
2938 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2940 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2941 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2942 ssize_int (gs_info->scale));
2943 step = fold_convert (offset_type, step);
2945 /* Create {0, X, X*2, X*3, ...}. */
2946 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2947 build_zero_cst (offset_type), step);
2948 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2951 /* Return the amount that should be added to a vector pointer to move
2952 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2953 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2954 vectorization. */
2956 static tree
2957 vect_get_data_ptr_increment (vec_info *vinfo,
2958 dr_vec_info *dr_info, tree aggr_type,
2959 vect_memory_access_type memory_access_type)
2961 if (memory_access_type == VMAT_INVARIANT)
2962 return size_zero_node;
2964 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2965 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2966 if (tree_int_cst_sgn (step) == -1)
2967 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2968 return iv_step;
2971 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2973 static bool
2974 vectorizable_bswap (vec_info *vinfo,
2975 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2976 gimple **vec_stmt, slp_tree slp_node,
2977 slp_tree *slp_op,
2978 tree vectype_in, stmt_vector_for_cost *cost_vec)
2980 tree op, vectype;
2981 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2982 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2983 unsigned ncopies;
2985 op = gimple_call_arg (stmt, 0);
2986 vectype = STMT_VINFO_VECTYPE (stmt_info);
2987 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2989 /* Multiple types in SLP are handled by creating the appropriate number of
2990 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2991 case of SLP. */
2992 if (slp_node)
2993 ncopies = 1;
2994 else
2995 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2997 gcc_assert (ncopies >= 1);
2999 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3000 if (! char_vectype)
3001 return false;
3003 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3004 unsigned word_bytes;
3005 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3006 return false;
3008 /* The encoding uses one stepped pattern for each byte in the word. */
3009 vec_perm_builder elts (num_bytes, word_bytes, 3);
3010 for (unsigned i = 0; i < 3; ++i)
3011 for (unsigned j = 0; j < word_bytes; ++j)
3012 elts.quick_push ((i + 1) * word_bytes - j - 1);
3014 vec_perm_indices indices (elts, 1, num_bytes);
3015 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3016 return false;
3018 if (! vec_stmt)
3020 if (slp_node
3021 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3023 if (dump_enabled_p ())
3024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3025 "incompatible vector types for invariants\n");
3026 return false;
3029 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3030 DUMP_VECT_SCOPE ("vectorizable_bswap");
3031 record_stmt_cost (cost_vec,
3032 1, vector_stmt, stmt_info, 0, vect_prologue);
3033 record_stmt_cost (cost_vec,
3034 slp_node
3035 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3036 vec_perm, stmt_info, 0, vect_body);
3037 return true;
3040 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3042 /* Transform. */
3043 vec<tree> vec_oprnds = vNULL;
3044 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3045 op, &vec_oprnds);
3046 /* Arguments are ready. create the new vector stmt. */
3047 unsigned i;
3048 tree vop;
3049 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3051 gimple *new_stmt;
3052 tree tem = make_ssa_name (char_vectype);
3053 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3054 char_vectype, vop));
3055 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3056 tree tem2 = make_ssa_name (char_vectype);
3057 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3058 tem, tem, bswap_vconst);
3059 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3060 tem = make_ssa_name (vectype);
3061 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3062 vectype, tem2));
3063 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3064 if (slp_node)
3065 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3066 else
3067 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3070 if (!slp_node)
3071 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3073 vec_oprnds.release ();
3074 return true;
3077 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3078 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3079 in a single step. On success, store the binary pack code in
3080 *CONVERT_CODE. */
3082 static bool
3083 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3084 tree_code *convert_code)
3086 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3087 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3088 return false;
3090 tree_code code;
3091 int multi_step_cvt = 0;
3092 auto_vec <tree, 8> interm_types;
3093 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3094 &code, &multi_step_cvt, &interm_types)
3095 || multi_step_cvt)
3096 return false;
3098 *convert_code = code;
3099 return true;
3102 /* Function vectorizable_call.
3104 Check if STMT_INFO performs a function call that can be vectorized.
3105 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3106 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3107 Return true if STMT_INFO is vectorizable in this way. */
3109 static bool
3110 vectorizable_call (vec_info *vinfo,
3111 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3112 gimple **vec_stmt, slp_tree slp_node,
3113 stmt_vector_for_cost *cost_vec)
3115 gcall *stmt;
3116 tree vec_dest;
3117 tree scalar_dest;
3118 tree op;
3119 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3120 tree vectype_out, vectype_in;
3121 poly_uint64 nunits_in;
3122 poly_uint64 nunits_out;
3123 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3124 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3125 tree fndecl, new_temp, rhs_type;
3126 enum vect_def_type dt[4]
3127 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3128 vect_unknown_def_type };
3129 tree vectypes[ARRAY_SIZE (dt)] = {};
3130 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3131 int ndts = ARRAY_SIZE (dt);
3132 int ncopies, j;
3133 auto_vec<tree, 8> vargs;
3134 auto_vec<tree, 8> orig_vargs;
3135 enum { NARROW, NONE, WIDEN } modifier;
3136 size_t i, nargs;
3137 tree lhs;
3139 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3140 return false;
3142 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3143 && ! vec_stmt)
3144 return false;
3146 /* Is STMT_INFO a vectorizable call? */
3147 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3148 if (!stmt)
3149 return false;
3151 if (gimple_call_internal_p (stmt)
3152 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3153 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3154 /* Handled by vectorizable_load and vectorizable_store. */
3155 return false;
3157 if (gimple_call_lhs (stmt) == NULL_TREE
3158 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3159 return false;
3161 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3163 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3165 /* Process function arguments. */
3166 rhs_type = NULL_TREE;
3167 vectype_in = NULL_TREE;
3168 nargs = gimple_call_num_args (stmt);
3170 /* Bail out if the function has more than four arguments, we do not have
3171 interesting builtin functions to vectorize with more than two arguments
3172 except for fma. No arguments is also not good. */
3173 if (nargs == 0 || nargs > 4)
3174 return false;
3176 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3177 combined_fn cfn = gimple_call_combined_fn (stmt);
3178 if (cfn == CFN_GOMP_SIMD_LANE)
3180 nargs = 0;
3181 rhs_type = unsigned_type_node;
3184 int mask_opno = -1;
3185 if (internal_fn_p (cfn))
3186 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3188 for (i = 0; i < nargs; i++)
3190 if ((int) i == mask_opno)
3192 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3193 &op, &slp_op[i], &dt[i], &vectypes[i]))
3194 return false;
3195 continue;
3198 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3199 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3201 if (dump_enabled_p ())
3202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3203 "use not simple.\n");
3204 return false;
3207 /* We can only handle calls with arguments of the same type. */
3208 if (rhs_type
3209 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3211 if (dump_enabled_p ())
3212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3213 "argument types differ.\n");
3214 return false;
3216 if (!rhs_type)
3217 rhs_type = TREE_TYPE (op);
3219 if (!vectype_in)
3220 vectype_in = vectypes[i];
3221 else if (vectypes[i]
3222 && !types_compatible_p (vectypes[i], vectype_in))
3224 if (dump_enabled_p ())
3225 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3226 "argument vector types differ.\n");
3227 return false;
3230 /* If all arguments are external or constant defs, infer the vector type
3231 from the scalar type. */
3232 if (!vectype_in)
3233 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3234 if (vec_stmt)
3235 gcc_assert (vectype_in);
3236 if (!vectype_in)
3238 if (dump_enabled_p ())
3239 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3240 "no vectype for scalar type %T\n", rhs_type);
3242 return false;
3244 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3245 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3246 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3247 by a pack of the two vectors into an SI vector. We would need
3248 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3249 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3251 if (dump_enabled_p ())
3252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3253 "mismatched vector sizes %T and %T\n",
3254 vectype_in, vectype_out);
3255 return false;
3258 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3259 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3261 if (dump_enabled_p ())
3262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3263 "mixed mask and nonmask vector types\n");
3264 return false;
3267 /* FORNOW */
3268 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3269 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3270 if (known_eq (nunits_in * 2, nunits_out))
3271 modifier = NARROW;
3272 else if (known_eq (nunits_out, nunits_in))
3273 modifier = NONE;
3274 else if (known_eq (nunits_out * 2, nunits_in))
3275 modifier = WIDEN;
3276 else
3277 return false;
3279 /* We only handle functions that do not read or clobber memory. */
3280 if (gimple_vuse (stmt))
3282 if (dump_enabled_p ())
3283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3284 "function reads from or writes to memory.\n");
3285 return false;
3288 /* For now, we only vectorize functions if a target specific builtin
3289 is available. TODO -- in some cases, it might be profitable to
3290 insert the calls for pieces of the vector, in order to be able
3291 to vectorize other operations in the loop. */
3292 fndecl = NULL_TREE;
3293 internal_fn ifn = IFN_LAST;
3294 tree callee = gimple_call_fndecl (stmt);
3296 /* First try using an internal function. */
3297 tree_code convert_code = ERROR_MARK;
3298 if (cfn != CFN_LAST
3299 && (modifier == NONE
3300 || (modifier == NARROW
3301 && simple_integer_narrowing (vectype_out, vectype_in,
3302 &convert_code))))
3303 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3304 vectype_in);
3306 /* If that fails, try asking for a target-specific built-in function. */
3307 if (ifn == IFN_LAST)
3309 if (cfn != CFN_LAST)
3310 fndecl = targetm.vectorize.builtin_vectorized_function
3311 (cfn, vectype_out, vectype_in);
3312 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3313 fndecl = targetm.vectorize.builtin_md_vectorized_function
3314 (callee, vectype_out, vectype_in);
3317 if (ifn == IFN_LAST && !fndecl)
3319 if (cfn == CFN_GOMP_SIMD_LANE
3320 && !slp_node
3321 && loop_vinfo
3322 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3323 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3324 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3325 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3327 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3328 { 0, 1, 2, ... vf - 1 } vector. */
3329 gcc_assert (nargs == 0);
3331 else if (modifier == NONE
3332 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3333 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3334 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3335 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3336 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3337 slp_op, vectype_in, cost_vec);
3338 else
3340 if (dump_enabled_p ())
3341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3342 "function is not vectorizable.\n");
3343 return false;
3347 if (slp_node)
3348 ncopies = 1;
3349 else if (modifier == NARROW && ifn == IFN_LAST)
3350 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3351 else
3352 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3354 /* Sanity check: make sure that at least one copy of the vectorized stmt
3355 needs to be generated. */
3356 gcc_assert (ncopies >= 1);
3358 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3359 if (!vec_stmt) /* transformation not required. */
3361 if (slp_node)
3362 for (i = 0; i < nargs; ++i)
3363 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3367 "incompatible vector types for invariants\n");
3368 return false;
3370 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3371 DUMP_VECT_SCOPE ("vectorizable_call");
3372 vect_model_simple_cost (vinfo, stmt_info,
3373 ncopies, dt, ndts, slp_node, cost_vec);
3374 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3375 record_stmt_cost (cost_vec, ncopies / 2,
3376 vec_promote_demote, stmt_info, 0, vect_body);
3378 if (loop_vinfo && mask_opno >= 0)
3380 unsigned int nvectors = (slp_node
3381 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3382 : ncopies);
3383 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3384 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3385 vectype_out, scalar_mask);
3387 return true;
3390 /* Transform. */
3392 if (dump_enabled_p ())
3393 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3395 /* Handle def. */
3396 scalar_dest = gimple_call_lhs (stmt);
3397 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3399 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3401 if (modifier == NONE || ifn != IFN_LAST)
3403 tree prev_res = NULL_TREE;
3404 vargs.safe_grow (nargs, true);
3405 orig_vargs.safe_grow (nargs, true);
3406 auto_vec<vec<tree> > vec_defs (nargs);
3407 for (j = 0; j < ncopies; ++j)
3409 /* Build argument list for the vectorized call. */
3410 if (slp_node)
3412 vec<tree> vec_oprnds0;
3414 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3415 vec_oprnds0 = vec_defs[0];
3417 /* Arguments are ready. Create the new vector stmt. */
3418 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3420 size_t k;
3421 for (k = 0; k < nargs; k++)
3423 vec<tree> vec_oprndsk = vec_defs[k];
3424 vargs[k] = vec_oprndsk[i];
3426 gimple *new_stmt;
3427 if (modifier == NARROW)
3429 /* We don't define any narrowing conditional functions
3430 at present. */
3431 gcc_assert (mask_opno < 0);
3432 tree half_res = make_ssa_name (vectype_in);
3433 gcall *call
3434 = gimple_build_call_internal_vec (ifn, vargs);
3435 gimple_call_set_lhs (call, half_res);
3436 gimple_call_set_nothrow (call, true);
3437 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3438 if ((i & 1) == 0)
3440 prev_res = half_res;
3441 continue;
3443 new_temp = make_ssa_name (vec_dest);
3444 new_stmt = gimple_build_assign (new_temp, convert_code,
3445 prev_res, half_res);
3446 vect_finish_stmt_generation (vinfo, stmt_info,
3447 new_stmt, gsi);
3449 else
3451 if (mask_opno >= 0 && masked_loop_p)
3453 unsigned int vec_num = vec_oprnds0.length ();
3454 /* Always true for SLP. */
3455 gcc_assert (ncopies == 1);
3456 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3457 vectype_out, i);
3458 vargs[mask_opno] = prepare_load_store_mask
3459 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3462 gcall *call;
3463 if (ifn != IFN_LAST)
3464 call = gimple_build_call_internal_vec (ifn, vargs);
3465 else
3466 call = gimple_build_call_vec (fndecl, vargs);
3467 new_temp = make_ssa_name (vec_dest, call);
3468 gimple_call_set_lhs (call, new_temp);
3469 gimple_call_set_nothrow (call, true);
3470 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3471 new_stmt = call;
3473 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3475 continue;
3478 for (i = 0; i < nargs; i++)
3480 op = gimple_call_arg (stmt, i);
3481 if (j == 0)
3483 vec_defs.quick_push (vNULL);
3484 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3485 op, &vec_defs[i],
3486 vectypes[i]);
3488 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3491 if (mask_opno >= 0 && masked_loop_p)
3493 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3494 vectype_out, j);
3495 vargs[mask_opno]
3496 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3497 vargs[mask_opno], gsi);
3500 gimple *new_stmt;
3501 if (cfn == CFN_GOMP_SIMD_LANE)
3503 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3504 tree new_var
3505 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3506 gimple *init_stmt = gimple_build_assign (new_var, cst);
3507 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3508 new_temp = make_ssa_name (vec_dest);
3509 new_stmt = gimple_build_assign (new_temp, new_var);
3510 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3512 else if (modifier == NARROW)
3514 /* We don't define any narrowing conditional functions at
3515 present. */
3516 gcc_assert (mask_opno < 0);
3517 tree half_res = make_ssa_name (vectype_in);
3518 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3519 gimple_call_set_lhs (call, half_res);
3520 gimple_call_set_nothrow (call, true);
3521 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3522 if ((j & 1) == 0)
3524 prev_res = half_res;
3525 continue;
3527 new_temp = make_ssa_name (vec_dest);
3528 new_stmt = gimple_build_assign (new_temp, convert_code,
3529 prev_res, half_res);
3530 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3532 else
3534 gcall *call;
3535 if (ifn != IFN_LAST)
3536 call = gimple_build_call_internal_vec (ifn, vargs);
3537 else
3538 call = gimple_build_call_vec (fndecl, vargs);
3539 new_temp = make_ssa_name (vec_dest, call);
3540 gimple_call_set_lhs (call, new_temp);
3541 gimple_call_set_nothrow (call, true);
3542 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3543 new_stmt = call;
3546 if (j == (modifier == NARROW ? 1 : 0))
3547 *vec_stmt = new_stmt;
3548 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3550 for (i = 0; i < nargs; i++)
3552 vec<tree> vec_oprndsi = vec_defs[i];
3553 vec_oprndsi.release ();
3556 else if (modifier == NARROW)
3558 auto_vec<vec<tree> > vec_defs (nargs);
3559 /* We don't define any narrowing conditional functions at present. */
3560 gcc_assert (mask_opno < 0);
3561 for (j = 0; j < ncopies; ++j)
3563 /* Build argument list for the vectorized call. */
3564 if (j == 0)
3565 vargs.create (nargs * 2);
3566 else
3567 vargs.truncate (0);
3569 if (slp_node)
3571 vec<tree> vec_oprnds0;
3573 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3574 vec_oprnds0 = vec_defs[0];
3576 /* Arguments are ready. Create the new vector stmt. */
3577 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3579 size_t k;
3580 vargs.truncate (0);
3581 for (k = 0; k < nargs; k++)
3583 vec<tree> vec_oprndsk = vec_defs[k];
3584 vargs.quick_push (vec_oprndsk[i]);
3585 vargs.quick_push (vec_oprndsk[i + 1]);
3587 gcall *call;
3588 if (ifn != IFN_LAST)
3589 call = gimple_build_call_internal_vec (ifn, vargs);
3590 else
3591 call = gimple_build_call_vec (fndecl, vargs);
3592 new_temp = make_ssa_name (vec_dest, call);
3593 gimple_call_set_lhs (call, new_temp);
3594 gimple_call_set_nothrow (call, true);
3595 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3596 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3598 continue;
3601 for (i = 0; i < nargs; i++)
3603 op = gimple_call_arg (stmt, i);
3604 if (j == 0)
3606 vec_defs.quick_push (vNULL);
3607 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3608 op, &vec_defs[i], vectypes[i]);
3610 vec_oprnd0 = vec_defs[i][2*j];
3611 vec_oprnd1 = vec_defs[i][2*j+1];
3613 vargs.quick_push (vec_oprnd0);
3614 vargs.quick_push (vec_oprnd1);
3617 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3618 new_temp = make_ssa_name (vec_dest, new_stmt);
3619 gimple_call_set_lhs (new_stmt, new_temp);
3620 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3622 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3625 if (!slp_node)
3626 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3628 for (i = 0; i < nargs; i++)
3630 vec<tree> vec_oprndsi = vec_defs[i];
3631 vec_oprndsi.release ();
3634 else
3635 /* No current target implements this case. */
3636 return false;
3638 vargs.release ();
3640 /* The call in STMT might prevent it from being removed in dce.
3641 We however cannot remove it here, due to the way the ssa name
3642 it defines is mapped to the new definition. So just replace
3643 rhs of the statement with something harmless. */
3645 if (slp_node)
3646 return true;
3648 stmt_info = vect_orig_stmt (stmt_info);
3649 lhs = gimple_get_lhs (stmt_info->stmt);
3651 gassign *new_stmt
3652 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3653 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3655 return true;
3659 struct simd_call_arg_info
3661 tree vectype;
3662 tree op;
3663 HOST_WIDE_INT linear_step;
3664 enum vect_def_type dt;
3665 unsigned int align;
3666 bool simd_lane_linear;
3669 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3670 is linear within simd lane (but not within whole loop), note it in
3671 *ARGINFO. */
3673 static void
3674 vect_simd_lane_linear (tree op, class loop *loop,
3675 struct simd_call_arg_info *arginfo)
3677 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3679 if (!is_gimple_assign (def_stmt)
3680 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3681 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3682 return;
3684 tree base = gimple_assign_rhs1 (def_stmt);
3685 HOST_WIDE_INT linear_step = 0;
3686 tree v = gimple_assign_rhs2 (def_stmt);
3687 while (TREE_CODE (v) == SSA_NAME)
3689 tree t;
3690 def_stmt = SSA_NAME_DEF_STMT (v);
3691 if (is_gimple_assign (def_stmt))
3692 switch (gimple_assign_rhs_code (def_stmt))
3694 case PLUS_EXPR:
3695 t = gimple_assign_rhs2 (def_stmt);
3696 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3697 return;
3698 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3699 v = gimple_assign_rhs1 (def_stmt);
3700 continue;
3701 case MULT_EXPR:
3702 t = gimple_assign_rhs2 (def_stmt);
3703 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3704 return;
3705 linear_step = tree_to_shwi (t);
3706 v = gimple_assign_rhs1 (def_stmt);
3707 continue;
3708 CASE_CONVERT:
3709 t = gimple_assign_rhs1 (def_stmt);
3710 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3711 || (TYPE_PRECISION (TREE_TYPE (v))
3712 < TYPE_PRECISION (TREE_TYPE (t))))
3713 return;
3714 if (!linear_step)
3715 linear_step = 1;
3716 v = t;
3717 continue;
3718 default:
3719 return;
3721 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3722 && loop->simduid
3723 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3724 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3725 == loop->simduid))
3727 if (!linear_step)
3728 linear_step = 1;
3729 arginfo->linear_step = linear_step;
3730 arginfo->op = base;
3731 arginfo->simd_lane_linear = true;
3732 return;
3737 /* Return the number of elements in vector type VECTYPE, which is associated
3738 with a SIMD clone. At present these vectors always have a constant
3739 length. */
3741 static unsigned HOST_WIDE_INT
3742 simd_clone_subparts (tree vectype)
3744 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3747 /* Function vectorizable_simd_clone_call.
3749 Check if STMT_INFO performs a function call that can be vectorized
3750 by calling a simd clone of the function.
3751 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3752 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3753 Return true if STMT_INFO is vectorizable in this way. */
3755 static bool
3756 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3757 gimple_stmt_iterator *gsi,
3758 gimple **vec_stmt, slp_tree slp_node,
3759 stmt_vector_for_cost *)
3761 tree vec_dest;
3762 tree scalar_dest;
3763 tree op, type;
3764 tree vec_oprnd0 = NULL_TREE;
3765 tree vectype;
3766 poly_uint64 nunits;
3767 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3768 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3769 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3770 tree fndecl, new_temp;
3771 int ncopies, j;
3772 auto_vec<simd_call_arg_info> arginfo;
3773 vec<tree> vargs = vNULL;
3774 size_t i, nargs;
3775 tree lhs, rtype, ratype;
3776 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3778 /* Is STMT a vectorizable call? */
3779 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3780 if (!stmt)
3781 return false;
3783 fndecl = gimple_call_fndecl (stmt);
3784 if (fndecl == NULL_TREE)
3785 return false;
3787 struct cgraph_node *node = cgraph_node::get (fndecl);
3788 if (node == NULL || node->simd_clones == NULL)
3789 return false;
3791 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3792 return false;
3794 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3795 && ! vec_stmt)
3796 return false;
3798 if (gimple_call_lhs (stmt)
3799 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3800 return false;
3802 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3804 vectype = STMT_VINFO_VECTYPE (stmt_info);
3806 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3807 return false;
3809 /* FORNOW */
3810 if (slp_node)
3811 return false;
3813 /* Process function arguments. */
3814 nargs = gimple_call_num_args (stmt);
3816 /* Bail out if the function has zero arguments. */
3817 if (nargs == 0)
3818 return false;
3820 arginfo.reserve (nargs, true);
3822 for (i = 0; i < nargs; i++)
3824 simd_call_arg_info thisarginfo;
3825 affine_iv iv;
3827 thisarginfo.linear_step = 0;
3828 thisarginfo.align = 0;
3829 thisarginfo.op = NULL_TREE;
3830 thisarginfo.simd_lane_linear = false;
3832 op = gimple_call_arg (stmt, i);
3833 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3834 &thisarginfo.vectype)
3835 || thisarginfo.dt == vect_uninitialized_def)
3837 if (dump_enabled_p ())
3838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3839 "use not simple.\n");
3840 return false;
3843 if (thisarginfo.dt == vect_constant_def
3844 || thisarginfo.dt == vect_external_def)
3845 gcc_assert (thisarginfo.vectype == NULL_TREE);
3846 else
3848 gcc_assert (thisarginfo.vectype != NULL_TREE);
3849 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3851 if (dump_enabled_p ())
3852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3853 "vector mask arguments are not supported\n");
3854 return false;
3858 /* For linear arguments, the analyze phase should have saved
3859 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3860 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3861 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3863 gcc_assert (vec_stmt);
3864 thisarginfo.linear_step
3865 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3866 thisarginfo.op
3867 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3868 thisarginfo.simd_lane_linear
3869 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3870 == boolean_true_node);
3871 /* If loop has been peeled for alignment, we need to adjust it. */
3872 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3873 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3874 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3876 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3877 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3878 tree opt = TREE_TYPE (thisarginfo.op);
3879 bias = fold_convert (TREE_TYPE (step), bias);
3880 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3881 thisarginfo.op
3882 = fold_build2 (POINTER_TYPE_P (opt)
3883 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3884 thisarginfo.op, bias);
3887 else if (!vec_stmt
3888 && thisarginfo.dt != vect_constant_def
3889 && thisarginfo.dt != vect_external_def
3890 && loop_vinfo
3891 && TREE_CODE (op) == SSA_NAME
3892 && simple_iv (loop, loop_containing_stmt (stmt), op,
3893 &iv, false)
3894 && tree_fits_shwi_p (iv.step))
3896 thisarginfo.linear_step = tree_to_shwi (iv.step);
3897 thisarginfo.op = iv.base;
3899 else if ((thisarginfo.dt == vect_constant_def
3900 || thisarginfo.dt == vect_external_def)
3901 && POINTER_TYPE_P (TREE_TYPE (op)))
3902 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3903 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3904 linear too. */
3905 if (POINTER_TYPE_P (TREE_TYPE (op))
3906 && !thisarginfo.linear_step
3907 && !vec_stmt
3908 && thisarginfo.dt != vect_constant_def
3909 && thisarginfo.dt != vect_external_def
3910 && loop_vinfo
3911 && !slp_node
3912 && TREE_CODE (op) == SSA_NAME)
3913 vect_simd_lane_linear (op, loop, &thisarginfo);
3915 arginfo.quick_push (thisarginfo);
3918 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3919 if (!vf.is_constant ())
3921 if (dump_enabled_p ())
3922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3923 "not considering SIMD clones; not yet supported"
3924 " for variable-width vectors.\n");
3925 return false;
3928 unsigned int badness = 0;
3929 struct cgraph_node *bestn = NULL;
3930 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3931 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3932 else
3933 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3934 n = n->simdclone->next_clone)
3936 unsigned int this_badness = 0;
3937 unsigned int num_calls;
3938 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
3939 || n->simdclone->nargs != nargs)
3940 continue;
3941 if (num_calls != 1)
3942 this_badness += exact_log2 (num_calls) * 4096;
3943 if (n->simdclone->inbranch)
3944 this_badness += 8192;
3945 int target_badness = targetm.simd_clone.usable (n);
3946 if (target_badness < 0)
3947 continue;
3948 this_badness += target_badness * 512;
3949 /* FORNOW: Have to add code to add the mask argument. */
3950 if (n->simdclone->inbranch)
3951 continue;
3952 for (i = 0; i < nargs; i++)
3954 switch (n->simdclone->args[i].arg_type)
3956 case SIMD_CLONE_ARG_TYPE_VECTOR:
3957 if (!useless_type_conversion_p
3958 (n->simdclone->args[i].orig_type,
3959 TREE_TYPE (gimple_call_arg (stmt, i))))
3960 i = -1;
3961 else if (arginfo[i].dt == vect_constant_def
3962 || arginfo[i].dt == vect_external_def
3963 || arginfo[i].linear_step)
3964 this_badness += 64;
3965 break;
3966 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3967 if (arginfo[i].dt != vect_constant_def
3968 && arginfo[i].dt != vect_external_def)
3969 i = -1;
3970 break;
3971 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3972 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3973 if (arginfo[i].dt == vect_constant_def
3974 || arginfo[i].dt == vect_external_def
3975 || (arginfo[i].linear_step
3976 != n->simdclone->args[i].linear_step))
3977 i = -1;
3978 break;
3979 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3980 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3981 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3982 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3983 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3984 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3985 /* FORNOW */
3986 i = -1;
3987 break;
3988 case SIMD_CLONE_ARG_TYPE_MASK:
3989 gcc_unreachable ();
3991 if (i == (size_t) -1)
3992 break;
3993 if (n->simdclone->args[i].alignment > arginfo[i].align)
3995 i = -1;
3996 break;
3998 if (arginfo[i].align)
3999 this_badness += (exact_log2 (arginfo[i].align)
4000 - exact_log2 (n->simdclone->args[i].alignment));
4002 if (i == (size_t) -1)
4003 continue;
4004 if (bestn == NULL || this_badness < badness)
4006 bestn = n;
4007 badness = this_badness;
4011 if (bestn == NULL)
4012 return false;
4014 for (i = 0; i < nargs; i++)
4015 if ((arginfo[i].dt == vect_constant_def
4016 || arginfo[i].dt == vect_external_def)
4017 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4019 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4020 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4021 slp_node);
4022 if (arginfo[i].vectype == NULL
4023 || !constant_multiple_p (bestn->simdclone->simdlen,
4024 simd_clone_subparts (arginfo[i].vectype)))
4025 return false;
4028 fndecl = bestn->decl;
4029 nunits = bestn->simdclone->simdlen;
4030 ncopies = vector_unroll_factor (vf, nunits);
4032 /* If the function isn't const, only allow it in simd loops where user
4033 has asserted that at least nunits consecutive iterations can be
4034 performed using SIMD instructions. */
4035 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4036 && gimple_vuse (stmt))
4037 return false;
4039 /* Sanity check: make sure that at least one copy of the vectorized stmt
4040 needs to be generated. */
4041 gcc_assert (ncopies >= 1);
4043 if (!vec_stmt) /* transformation not required. */
4045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4046 for (i = 0; i < nargs; i++)
4047 if ((bestn->simdclone->args[i].arg_type
4048 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4049 || (bestn->simdclone->args[i].arg_type
4050 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4052 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4053 + 1,
4054 true);
4055 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4056 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4057 ? size_type_node : TREE_TYPE (arginfo[i].op);
4058 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4059 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4060 tree sll = arginfo[i].simd_lane_linear
4061 ? boolean_true_node : boolean_false_node;
4062 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4064 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4065 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4066 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4067 dt, slp_node, cost_vec); */
4068 return true;
4071 /* Transform. */
4073 if (dump_enabled_p ())
4074 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4076 /* Handle def. */
4077 scalar_dest = gimple_call_lhs (stmt);
4078 vec_dest = NULL_TREE;
4079 rtype = NULL_TREE;
4080 ratype = NULL_TREE;
4081 if (scalar_dest)
4083 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4084 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4085 if (TREE_CODE (rtype) == ARRAY_TYPE)
4087 ratype = rtype;
4088 rtype = TREE_TYPE (ratype);
4092 auto_vec<vec<tree> > vec_oprnds;
4093 auto_vec<unsigned> vec_oprnds_i;
4094 vec_oprnds.safe_grow_cleared (nargs, true);
4095 vec_oprnds_i.safe_grow_cleared (nargs, true);
4096 for (j = 0; j < ncopies; ++j)
4098 /* Build argument list for the vectorized call. */
4099 if (j == 0)
4100 vargs.create (nargs);
4101 else
4102 vargs.truncate (0);
4104 for (i = 0; i < nargs; i++)
4106 unsigned int k, l, m, o;
4107 tree atype;
4108 op = gimple_call_arg (stmt, i);
4109 switch (bestn->simdclone->args[i].arg_type)
4111 case SIMD_CLONE_ARG_TYPE_VECTOR:
4112 atype = bestn->simdclone->args[i].vector_type;
4113 o = vector_unroll_factor (nunits,
4114 simd_clone_subparts (atype));
4115 for (m = j * o; m < (j + 1) * o; m++)
4117 if (simd_clone_subparts (atype)
4118 < simd_clone_subparts (arginfo[i].vectype))
4120 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4121 k = (simd_clone_subparts (arginfo[i].vectype)
4122 / simd_clone_subparts (atype));
4123 gcc_assert ((k & (k - 1)) == 0);
4124 if (m == 0)
4126 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4127 ncopies * o / k, op,
4128 &vec_oprnds[i]);
4129 vec_oprnds_i[i] = 0;
4130 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4132 else
4134 vec_oprnd0 = arginfo[i].op;
4135 if ((m & (k - 1)) == 0)
4136 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4138 arginfo[i].op = vec_oprnd0;
4139 vec_oprnd0
4140 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4141 bitsize_int (prec),
4142 bitsize_int ((m & (k - 1)) * prec));
4143 gassign *new_stmt
4144 = gimple_build_assign (make_ssa_name (atype),
4145 vec_oprnd0);
4146 vect_finish_stmt_generation (vinfo, stmt_info,
4147 new_stmt, gsi);
4148 vargs.safe_push (gimple_assign_lhs (new_stmt));
4150 else
4152 k = (simd_clone_subparts (atype)
4153 / simd_clone_subparts (arginfo[i].vectype));
4154 gcc_assert ((k & (k - 1)) == 0);
4155 vec<constructor_elt, va_gc> *ctor_elts;
4156 if (k != 1)
4157 vec_alloc (ctor_elts, k);
4158 else
4159 ctor_elts = NULL;
4160 for (l = 0; l < k; l++)
4162 if (m == 0 && l == 0)
4164 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4165 k * o * ncopies,
4167 &vec_oprnds[i]);
4168 vec_oprnds_i[i] = 0;
4169 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4171 else
4172 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4173 arginfo[i].op = vec_oprnd0;
4174 if (k == 1)
4175 break;
4176 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4177 vec_oprnd0);
4179 if (k == 1)
4180 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4181 atype))
4183 vec_oprnd0
4184 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4185 gassign *new_stmt
4186 = gimple_build_assign (make_ssa_name (atype),
4187 vec_oprnd0);
4188 vect_finish_stmt_generation (vinfo, stmt_info,
4189 new_stmt, gsi);
4190 vargs.safe_push (gimple_assign_lhs (new_stmt));
4192 else
4193 vargs.safe_push (vec_oprnd0);
4194 else
4196 vec_oprnd0 = build_constructor (atype, ctor_elts);
4197 gassign *new_stmt
4198 = gimple_build_assign (make_ssa_name (atype),
4199 vec_oprnd0);
4200 vect_finish_stmt_generation (vinfo, stmt_info,
4201 new_stmt, gsi);
4202 vargs.safe_push (gimple_assign_lhs (new_stmt));
4206 break;
4207 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4208 vargs.safe_push (op);
4209 break;
4210 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4211 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4212 if (j == 0)
4214 gimple_seq stmts;
4215 arginfo[i].op
4216 = force_gimple_operand (unshare_expr (arginfo[i].op),
4217 &stmts, true, NULL_TREE);
4218 if (stmts != NULL)
4220 basic_block new_bb;
4221 edge pe = loop_preheader_edge (loop);
4222 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4223 gcc_assert (!new_bb);
4225 if (arginfo[i].simd_lane_linear)
4227 vargs.safe_push (arginfo[i].op);
4228 break;
4230 tree phi_res = copy_ssa_name (op);
4231 gphi *new_phi = create_phi_node (phi_res, loop->header);
4232 add_phi_arg (new_phi, arginfo[i].op,
4233 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4234 enum tree_code code
4235 = POINTER_TYPE_P (TREE_TYPE (op))
4236 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4237 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4238 ? sizetype : TREE_TYPE (op);
4239 poly_widest_int cst
4240 = wi::mul (bestn->simdclone->args[i].linear_step,
4241 ncopies * nunits);
4242 tree tcst = wide_int_to_tree (type, cst);
4243 tree phi_arg = copy_ssa_name (op);
4244 gassign *new_stmt
4245 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4246 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4247 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4248 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4249 UNKNOWN_LOCATION);
4250 arginfo[i].op = phi_res;
4251 vargs.safe_push (phi_res);
4253 else
4255 enum tree_code code
4256 = POINTER_TYPE_P (TREE_TYPE (op))
4257 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4258 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4259 ? sizetype : TREE_TYPE (op);
4260 poly_widest_int cst
4261 = wi::mul (bestn->simdclone->args[i].linear_step,
4262 j * nunits);
4263 tree tcst = wide_int_to_tree (type, cst);
4264 new_temp = make_ssa_name (TREE_TYPE (op));
4265 gassign *new_stmt
4266 = gimple_build_assign (new_temp, code,
4267 arginfo[i].op, tcst);
4268 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4269 vargs.safe_push (new_temp);
4271 break;
4272 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4273 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4274 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4275 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4276 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4277 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4278 default:
4279 gcc_unreachable ();
4283 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4284 if (vec_dest)
4286 gcc_assert (ratype
4287 || known_eq (simd_clone_subparts (rtype), nunits));
4288 if (ratype)
4289 new_temp = create_tmp_var (ratype);
4290 else if (useless_type_conversion_p (vectype, rtype))
4291 new_temp = make_ssa_name (vec_dest, new_call);
4292 else
4293 new_temp = make_ssa_name (rtype, new_call);
4294 gimple_call_set_lhs (new_call, new_temp);
4296 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4297 gimple *new_stmt = new_call;
4299 if (vec_dest)
4301 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4303 unsigned int k, l;
4304 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4305 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4306 k = vector_unroll_factor (nunits,
4307 simd_clone_subparts (vectype));
4308 gcc_assert ((k & (k - 1)) == 0);
4309 for (l = 0; l < k; l++)
4311 tree t;
4312 if (ratype)
4314 t = build_fold_addr_expr (new_temp);
4315 t = build2 (MEM_REF, vectype, t,
4316 build_int_cst (TREE_TYPE (t), l * bytes));
4318 else
4319 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4320 bitsize_int (prec), bitsize_int (l * prec));
4321 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4322 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4324 if (j == 0 && l == 0)
4325 *vec_stmt = new_stmt;
4326 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4329 if (ratype)
4330 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4331 continue;
4333 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4335 unsigned int k = (simd_clone_subparts (vectype)
4336 / simd_clone_subparts (rtype));
4337 gcc_assert ((k & (k - 1)) == 0);
4338 if ((j & (k - 1)) == 0)
4339 vec_alloc (ret_ctor_elts, k);
4340 if (ratype)
4342 unsigned int m, o;
4343 o = vector_unroll_factor (nunits,
4344 simd_clone_subparts (rtype));
4345 for (m = 0; m < o; m++)
4347 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4348 size_int (m), NULL_TREE, NULL_TREE);
4349 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4350 tem);
4351 vect_finish_stmt_generation (vinfo, stmt_info,
4352 new_stmt, gsi);
4353 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4354 gimple_assign_lhs (new_stmt));
4356 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4358 else
4359 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4360 if ((j & (k - 1)) != k - 1)
4361 continue;
4362 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4363 new_stmt
4364 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4365 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4367 if ((unsigned) j == k - 1)
4368 *vec_stmt = new_stmt;
4369 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4370 continue;
4372 else if (ratype)
4374 tree t = build_fold_addr_expr (new_temp);
4375 t = build2 (MEM_REF, vectype, t,
4376 build_int_cst (TREE_TYPE (t), 0));
4377 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4378 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4379 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4381 else if (!useless_type_conversion_p (vectype, rtype))
4383 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4384 new_stmt
4385 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4386 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4390 if (j == 0)
4391 *vec_stmt = new_stmt;
4392 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4395 for (i = 0; i < nargs; ++i)
4397 vec<tree> oprndsi = vec_oprnds[i];
4398 oprndsi.release ();
4400 vargs.release ();
4402 /* The call in STMT might prevent it from being removed in dce.
4403 We however cannot remove it here, due to the way the ssa name
4404 it defines is mapped to the new definition. So just replace
4405 rhs of the statement with something harmless. */
4407 if (slp_node)
4408 return true;
4410 gimple *new_stmt;
4411 if (scalar_dest)
4413 type = TREE_TYPE (scalar_dest);
4414 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4415 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4417 else
4418 new_stmt = gimple_build_nop ();
4419 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4420 unlink_stmt_vdef (stmt);
4422 return true;
4426 /* Function vect_gen_widened_results_half
4428 Create a vector stmt whose code, type, number of arguments, and result
4429 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4430 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4431 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4432 needs to be created (DECL is a function-decl of a target-builtin).
4433 STMT_INFO is the original scalar stmt that we are vectorizing. */
4435 static gimple *
4436 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4437 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4438 tree vec_dest, gimple_stmt_iterator *gsi,
4439 stmt_vec_info stmt_info)
4441 gimple *new_stmt;
4442 tree new_temp;
4444 /* Generate half of the widened result: */
4445 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4446 if (op_type != binary_op)
4447 vec_oprnd1 = NULL;
4448 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4449 new_temp = make_ssa_name (vec_dest, new_stmt);
4450 gimple_assign_set_lhs (new_stmt, new_temp);
4451 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4453 return new_stmt;
4457 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4458 For multi-step conversions store the resulting vectors and call the function
4459 recursively. */
4461 static void
4462 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4463 int multi_step_cvt,
4464 stmt_vec_info stmt_info,
4465 vec<tree> vec_dsts,
4466 gimple_stmt_iterator *gsi,
4467 slp_tree slp_node, enum tree_code code)
4469 unsigned int i;
4470 tree vop0, vop1, new_tmp, vec_dest;
4472 vec_dest = vec_dsts.pop ();
4474 for (i = 0; i < vec_oprnds->length (); i += 2)
4476 /* Create demotion operation. */
4477 vop0 = (*vec_oprnds)[i];
4478 vop1 = (*vec_oprnds)[i + 1];
4479 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4480 new_tmp = make_ssa_name (vec_dest, new_stmt);
4481 gimple_assign_set_lhs (new_stmt, new_tmp);
4482 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4484 if (multi_step_cvt)
4485 /* Store the resulting vector for next recursive call. */
4486 (*vec_oprnds)[i/2] = new_tmp;
4487 else
4489 /* This is the last step of the conversion sequence. Store the
4490 vectors in SLP_NODE or in vector info of the scalar statement
4491 (or in STMT_VINFO_RELATED_STMT chain). */
4492 if (slp_node)
4493 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4494 else
4495 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4499 /* For multi-step demotion operations we first generate demotion operations
4500 from the source type to the intermediate types, and then combine the
4501 results (stored in VEC_OPRNDS) in demotion operation to the destination
4502 type. */
4503 if (multi_step_cvt)
4505 /* At each level of recursion we have half of the operands we had at the
4506 previous level. */
4507 vec_oprnds->truncate ((i+1)/2);
4508 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4509 multi_step_cvt - 1,
4510 stmt_info, vec_dsts, gsi,
4511 slp_node, VEC_PACK_TRUNC_EXPR);
4514 vec_dsts.quick_push (vec_dest);
4518 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4519 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4520 STMT_INFO. For multi-step conversions store the resulting vectors and
4521 call the function recursively. */
4523 static void
4524 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4525 vec<tree> *vec_oprnds0,
4526 vec<tree> *vec_oprnds1,
4527 stmt_vec_info stmt_info, tree vec_dest,
4528 gimple_stmt_iterator *gsi,
4529 enum tree_code code1,
4530 enum tree_code code2, int op_type)
4532 int i;
4533 tree vop0, vop1, new_tmp1, new_tmp2;
4534 gimple *new_stmt1, *new_stmt2;
4535 vec<tree> vec_tmp = vNULL;
4537 vec_tmp.create (vec_oprnds0->length () * 2);
4538 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4540 if (op_type == binary_op)
4541 vop1 = (*vec_oprnds1)[i];
4542 else
4543 vop1 = NULL_TREE;
4545 /* Generate the two halves of promotion operation. */
4546 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4547 op_type, vec_dest, gsi,
4548 stmt_info);
4549 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4550 op_type, vec_dest, gsi,
4551 stmt_info);
4552 if (is_gimple_call (new_stmt1))
4554 new_tmp1 = gimple_call_lhs (new_stmt1);
4555 new_tmp2 = gimple_call_lhs (new_stmt2);
4557 else
4559 new_tmp1 = gimple_assign_lhs (new_stmt1);
4560 new_tmp2 = gimple_assign_lhs (new_stmt2);
4563 /* Store the results for the next step. */
4564 vec_tmp.quick_push (new_tmp1);
4565 vec_tmp.quick_push (new_tmp2);
4568 vec_oprnds0->release ();
4569 *vec_oprnds0 = vec_tmp;
4572 /* Create vectorized promotion stmts for widening stmts using only half the
4573 potential vector size for input. */
4574 static void
4575 vect_create_half_widening_stmts (vec_info *vinfo,
4576 vec<tree> *vec_oprnds0,
4577 vec<tree> *vec_oprnds1,
4578 stmt_vec_info stmt_info, tree vec_dest,
4579 gimple_stmt_iterator *gsi,
4580 enum tree_code code1,
4581 int op_type)
4583 int i;
4584 tree vop0, vop1;
4585 gimple *new_stmt1;
4586 gimple *new_stmt2;
4587 gimple *new_stmt3;
4588 vec<tree> vec_tmp = vNULL;
4590 vec_tmp.create (vec_oprnds0->length ());
4591 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4593 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4595 gcc_assert (op_type == binary_op);
4596 vop1 = (*vec_oprnds1)[i];
4598 /* Widen the first vector input. */
4599 out_type = TREE_TYPE (vec_dest);
4600 new_tmp1 = make_ssa_name (out_type);
4601 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4602 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4603 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4605 /* Widen the second vector input. */
4606 new_tmp2 = make_ssa_name (out_type);
4607 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4608 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4609 /* Perform the operation. With both vector inputs widened. */
4610 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4612 else
4614 /* Perform the operation. With the single vector input widened. */
4615 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4618 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4619 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4620 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4622 /* Store the results for the next step. */
4623 vec_tmp.quick_push (new_tmp3);
4626 vec_oprnds0->release ();
4627 *vec_oprnds0 = vec_tmp;
4631 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4632 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4633 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4634 Return true if STMT_INFO is vectorizable in this way. */
4636 static bool
4637 vectorizable_conversion (vec_info *vinfo,
4638 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4639 gimple **vec_stmt, slp_tree slp_node,
4640 stmt_vector_for_cost *cost_vec)
4642 tree vec_dest;
4643 tree scalar_dest;
4644 tree op0, op1 = NULL_TREE;
4645 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4646 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4647 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4648 tree new_temp;
4649 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4650 int ndts = 2;
4651 poly_uint64 nunits_in;
4652 poly_uint64 nunits_out;
4653 tree vectype_out, vectype_in;
4654 int ncopies, i;
4655 tree lhs_type, rhs_type;
4656 enum { NARROW, NONE, WIDEN } modifier;
4657 vec<tree> vec_oprnds0 = vNULL;
4658 vec<tree> vec_oprnds1 = vNULL;
4659 tree vop0;
4660 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4661 int multi_step_cvt = 0;
4662 vec<tree> interm_types = vNULL;
4663 tree intermediate_type, cvt_type = NULL_TREE;
4664 int op_type;
4665 unsigned short fltsz;
4667 /* Is STMT a vectorizable conversion? */
4669 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4670 return false;
4672 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4673 && ! vec_stmt)
4674 return false;
4676 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4677 if (!stmt)
4678 return false;
4680 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4681 return false;
4683 code = gimple_assign_rhs_code (stmt);
4684 if (!CONVERT_EXPR_CODE_P (code)
4685 && code != FIX_TRUNC_EXPR
4686 && code != FLOAT_EXPR
4687 && code != WIDEN_PLUS_EXPR
4688 && code != WIDEN_MINUS_EXPR
4689 && code != WIDEN_MULT_EXPR
4690 && code != WIDEN_LSHIFT_EXPR)
4691 return false;
4693 op_type = TREE_CODE_LENGTH (code);
4695 /* Check types of lhs and rhs. */
4696 scalar_dest = gimple_assign_lhs (stmt);
4697 lhs_type = TREE_TYPE (scalar_dest);
4698 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4700 /* Check the operands of the operation. */
4701 slp_tree slp_op0, slp_op1 = NULL;
4702 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4703 0, &op0, &slp_op0, &dt[0], &vectype_in))
4705 if (dump_enabled_p ())
4706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4707 "use not simple.\n");
4708 return false;
4711 rhs_type = TREE_TYPE (op0);
4712 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4713 && !((INTEGRAL_TYPE_P (lhs_type)
4714 && INTEGRAL_TYPE_P (rhs_type))
4715 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4716 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4717 return false;
4719 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4720 && ((INTEGRAL_TYPE_P (lhs_type)
4721 && !type_has_mode_precision_p (lhs_type))
4722 || (INTEGRAL_TYPE_P (rhs_type)
4723 && !type_has_mode_precision_p (rhs_type))))
4725 if (dump_enabled_p ())
4726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4727 "type conversion to/from bit-precision unsupported."
4728 "\n");
4729 return false;
4732 if (op_type == binary_op)
4734 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4735 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4737 op1 = gimple_assign_rhs2 (stmt);
4738 tree vectype1_in;
4739 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4740 &op1, &slp_op1, &dt[1], &vectype1_in))
4742 if (dump_enabled_p ())
4743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4744 "use not simple.\n");
4745 return false;
4747 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4748 OP1. */
4749 if (!vectype_in)
4750 vectype_in = vectype1_in;
4753 /* If op0 is an external or constant def, infer the vector type
4754 from the scalar type. */
4755 if (!vectype_in)
4756 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4757 if (vec_stmt)
4758 gcc_assert (vectype_in);
4759 if (!vectype_in)
4761 if (dump_enabled_p ())
4762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4763 "no vectype for scalar type %T\n", rhs_type);
4765 return false;
4768 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4769 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4771 if (dump_enabled_p ())
4772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4773 "can't convert between boolean and non "
4774 "boolean vectors %T\n", rhs_type);
4776 return false;
4779 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4780 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4781 if (known_eq (nunits_out, nunits_in))
4782 if (code == WIDEN_MINUS_EXPR
4783 || code == WIDEN_PLUS_EXPR
4784 || code == WIDEN_LSHIFT_EXPR
4785 || code == WIDEN_MULT_EXPR)
4786 modifier = WIDEN;
4787 else
4788 modifier = NONE;
4789 else if (multiple_p (nunits_out, nunits_in))
4790 modifier = NARROW;
4791 else
4793 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4794 modifier = WIDEN;
4797 /* Multiple types in SLP are handled by creating the appropriate number of
4798 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4799 case of SLP. */
4800 if (slp_node)
4801 ncopies = 1;
4802 else if (modifier == NARROW)
4803 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4804 else
4805 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4807 /* Sanity check: make sure that at least one copy of the vectorized stmt
4808 needs to be generated. */
4809 gcc_assert (ncopies >= 1);
4811 bool found_mode = false;
4812 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4813 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4814 opt_scalar_mode rhs_mode_iter;
4816 /* Supportable by target? */
4817 switch (modifier)
4819 case NONE:
4820 if (code != FIX_TRUNC_EXPR
4821 && code != FLOAT_EXPR
4822 && !CONVERT_EXPR_CODE_P (code))
4823 return false;
4824 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4825 break;
4826 /* FALLTHRU */
4827 unsupported:
4828 if (dump_enabled_p ())
4829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4830 "conversion not supported by target.\n");
4831 return false;
4833 case WIDEN:
4834 if (known_eq (nunits_in, nunits_out))
4836 if (!supportable_half_widening_operation (code, vectype_out,
4837 vectype_in, &code1))
4838 goto unsupported;
4839 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4840 break;
4842 if (supportable_widening_operation (vinfo, code, stmt_info,
4843 vectype_out, vectype_in, &code1,
4844 &code2, &multi_step_cvt,
4845 &interm_types))
4847 /* Binary widening operation can only be supported directly by the
4848 architecture. */
4849 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4850 break;
4853 if (code != FLOAT_EXPR
4854 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4855 goto unsupported;
4857 fltsz = GET_MODE_SIZE (lhs_mode);
4858 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4860 rhs_mode = rhs_mode_iter.require ();
4861 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4862 break;
4864 cvt_type
4865 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4866 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4867 if (cvt_type == NULL_TREE)
4868 goto unsupported;
4870 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4872 if (!supportable_convert_operation (code, vectype_out,
4873 cvt_type, &codecvt1))
4874 goto unsupported;
4876 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4877 vectype_out, cvt_type,
4878 &codecvt1, &codecvt2,
4879 &multi_step_cvt,
4880 &interm_types))
4881 continue;
4882 else
4883 gcc_assert (multi_step_cvt == 0);
4885 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4886 cvt_type,
4887 vectype_in, &code1, &code2,
4888 &multi_step_cvt, &interm_types))
4890 found_mode = true;
4891 break;
4895 if (!found_mode)
4896 goto unsupported;
4898 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4899 codecvt2 = ERROR_MARK;
4900 else
4902 multi_step_cvt++;
4903 interm_types.safe_push (cvt_type);
4904 cvt_type = NULL_TREE;
4906 break;
4908 case NARROW:
4909 gcc_assert (op_type == unary_op);
4910 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4911 &code1, &multi_step_cvt,
4912 &interm_types))
4913 break;
4915 if (code != FIX_TRUNC_EXPR
4916 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4917 goto unsupported;
4919 cvt_type
4920 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4921 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4922 if (cvt_type == NULL_TREE)
4923 goto unsupported;
4924 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4925 &codecvt1))
4926 goto unsupported;
4927 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4928 &code1, &multi_step_cvt,
4929 &interm_types))
4930 break;
4931 goto unsupported;
4933 default:
4934 gcc_unreachable ();
4937 if (!vec_stmt) /* transformation not required. */
4939 if (slp_node
4940 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4941 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4943 if (dump_enabled_p ())
4944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4945 "incompatible vector types for invariants\n");
4946 return false;
4948 DUMP_VECT_SCOPE ("vectorizable_conversion");
4949 if (modifier == NONE)
4951 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4952 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4953 cost_vec);
4955 else if (modifier == NARROW)
4957 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4958 /* The final packing step produces one vector result per copy. */
4959 unsigned int nvectors
4960 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4961 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4962 multi_step_cvt, cost_vec);
4964 else
4966 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4967 /* The initial unpacking step produces two vector results
4968 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4969 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4970 unsigned int nvectors
4971 = (slp_node
4972 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4973 : ncopies * 2);
4974 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4975 multi_step_cvt, cost_vec);
4977 interm_types.release ();
4978 return true;
4981 /* Transform. */
4982 if (dump_enabled_p ())
4983 dump_printf_loc (MSG_NOTE, vect_location,
4984 "transform conversion. ncopies = %d.\n", ncopies);
4986 if (op_type == binary_op)
4988 if (CONSTANT_CLASS_P (op0))
4989 op0 = fold_convert (TREE_TYPE (op1), op0);
4990 else if (CONSTANT_CLASS_P (op1))
4991 op1 = fold_convert (TREE_TYPE (op0), op1);
4994 /* In case of multi-step conversion, we first generate conversion operations
4995 to the intermediate types, and then from that types to the final one.
4996 We create vector destinations for the intermediate type (TYPES) received
4997 from supportable_*_operation, and store them in the correct order
4998 for future use in vect_create_vectorized_*_stmts (). */
4999 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5000 vec_dest = vect_create_destination_var (scalar_dest,
5001 (cvt_type && modifier == WIDEN)
5002 ? cvt_type : vectype_out);
5003 vec_dsts.quick_push (vec_dest);
5005 if (multi_step_cvt)
5007 for (i = interm_types.length () - 1;
5008 interm_types.iterate (i, &intermediate_type); i--)
5010 vec_dest = vect_create_destination_var (scalar_dest,
5011 intermediate_type);
5012 vec_dsts.quick_push (vec_dest);
5016 if (cvt_type)
5017 vec_dest = vect_create_destination_var (scalar_dest,
5018 modifier == WIDEN
5019 ? vectype_out : cvt_type);
5021 int ninputs = 1;
5022 if (!slp_node)
5024 if (modifier == WIDEN)
5026 else if (modifier == NARROW)
5028 if (multi_step_cvt)
5029 ninputs = vect_pow2 (multi_step_cvt);
5030 ninputs *= 2;
5034 switch (modifier)
5036 case NONE:
5037 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5038 op0, &vec_oprnds0);
5039 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5041 /* Arguments are ready, create the new vector stmt. */
5042 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5043 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5044 new_temp = make_ssa_name (vec_dest, new_stmt);
5045 gimple_assign_set_lhs (new_stmt, new_temp);
5046 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5048 if (slp_node)
5049 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5050 else
5051 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5053 break;
5055 case WIDEN:
5056 /* In case the vectorization factor (VF) is bigger than the number
5057 of elements that we can fit in a vectype (nunits), we have to
5058 generate more than one vector stmt - i.e - we need to "unroll"
5059 the vector stmt by a factor VF/nunits. */
5060 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5061 op0, &vec_oprnds0,
5062 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5063 &vec_oprnds1);
5064 if (code == WIDEN_LSHIFT_EXPR)
5066 int oprnds_size = vec_oprnds0.length ();
5067 vec_oprnds1.create (oprnds_size);
5068 for (i = 0; i < oprnds_size; ++i)
5069 vec_oprnds1.quick_push (op1);
5071 /* Arguments are ready. Create the new vector stmts. */
5072 for (i = multi_step_cvt; i >= 0; i--)
5074 tree this_dest = vec_dsts[i];
5075 enum tree_code c1 = code1, c2 = code2;
5076 if (i == 0 && codecvt2 != ERROR_MARK)
5078 c1 = codecvt1;
5079 c2 = codecvt2;
5081 if (known_eq (nunits_out, nunits_in))
5082 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5083 &vec_oprnds1, stmt_info,
5084 this_dest, gsi,
5085 c1, op_type);
5086 else
5087 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5088 &vec_oprnds1, stmt_info,
5089 this_dest, gsi,
5090 c1, c2, op_type);
5093 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5095 gimple *new_stmt;
5096 if (cvt_type)
5098 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5099 new_temp = make_ssa_name (vec_dest);
5100 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5101 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5103 else
5104 new_stmt = SSA_NAME_DEF_STMT (vop0);
5106 if (slp_node)
5107 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5108 else
5109 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5111 break;
5113 case NARROW:
5114 /* In case the vectorization factor (VF) is bigger than the number
5115 of elements that we can fit in a vectype (nunits), we have to
5116 generate more than one vector stmt - i.e - we need to "unroll"
5117 the vector stmt by a factor VF/nunits. */
5118 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5119 op0, &vec_oprnds0);
5120 /* Arguments are ready. Create the new vector stmts. */
5121 if (cvt_type)
5122 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5124 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5125 new_temp = make_ssa_name (vec_dest);
5126 gassign *new_stmt
5127 = gimple_build_assign (new_temp, codecvt1, vop0);
5128 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5129 vec_oprnds0[i] = new_temp;
5132 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5133 multi_step_cvt,
5134 stmt_info, vec_dsts, gsi,
5135 slp_node, code1);
5136 break;
5138 if (!slp_node)
5139 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5141 vec_oprnds0.release ();
5142 vec_oprnds1.release ();
5143 interm_types.release ();
5145 return true;
5148 /* Return true if we can assume from the scalar form of STMT_INFO that
5149 neither the scalar nor the vector forms will generate code. STMT_INFO
5150 is known not to involve a data reference. */
5152 bool
5153 vect_nop_conversion_p (stmt_vec_info stmt_info)
5155 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5156 if (!stmt)
5157 return false;
5159 tree lhs = gimple_assign_lhs (stmt);
5160 tree_code code = gimple_assign_rhs_code (stmt);
5161 tree rhs = gimple_assign_rhs1 (stmt);
5163 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5164 return true;
5166 if (CONVERT_EXPR_CODE_P (code))
5167 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5169 return false;
5172 /* Function vectorizable_assignment.
5174 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5175 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5176 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5177 Return true if STMT_INFO is vectorizable in this way. */
5179 static bool
5180 vectorizable_assignment (vec_info *vinfo,
5181 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5182 gimple **vec_stmt, slp_tree slp_node,
5183 stmt_vector_for_cost *cost_vec)
5185 tree vec_dest;
5186 tree scalar_dest;
5187 tree op;
5188 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5189 tree new_temp;
5190 enum vect_def_type dt[1] = {vect_unknown_def_type};
5191 int ndts = 1;
5192 int ncopies;
5193 int i;
5194 vec<tree> vec_oprnds = vNULL;
5195 tree vop;
5196 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5197 enum tree_code code;
5198 tree vectype_in;
5200 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5201 return false;
5203 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5204 && ! vec_stmt)
5205 return false;
5207 /* Is vectorizable assignment? */
5208 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5209 if (!stmt)
5210 return false;
5212 scalar_dest = gimple_assign_lhs (stmt);
5213 if (TREE_CODE (scalar_dest) != SSA_NAME)
5214 return false;
5216 if (STMT_VINFO_DATA_REF (stmt_info))
5217 return false;
5219 code = gimple_assign_rhs_code (stmt);
5220 if (!(gimple_assign_single_p (stmt)
5221 || code == PAREN_EXPR
5222 || CONVERT_EXPR_CODE_P (code)))
5223 return false;
5225 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5226 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5228 /* Multiple types in SLP are handled by creating the appropriate number of
5229 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5230 case of SLP. */
5231 if (slp_node)
5232 ncopies = 1;
5233 else
5234 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5236 gcc_assert (ncopies >= 1);
5238 slp_tree slp_op;
5239 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5240 &dt[0], &vectype_in))
5242 if (dump_enabled_p ())
5243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5244 "use not simple.\n");
5245 return false;
5247 if (!vectype_in)
5248 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5250 /* We can handle NOP_EXPR conversions that do not change the number
5251 of elements or the vector size. */
5252 if ((CONVERT_EXPR_CODE_P (code)
5253 || code == VIEW_CONVERT_EXPR)
5254 && (!vectype_in
5255 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5256 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5257 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5258 return false;
5260 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5261 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5263 if (dump_enabled_p ())
5264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5265 "can't convert between boolean and non "
5266 "boolean vectors %T\n", TREE_TYPE (op));
5268 return false;
5271 /* We do not handle bit-precision changes. */
5272 if ((CONVERT_EXPR_CODE_P (code)
5273 || code == VIEW_CONVERT_EXPR)
5274 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5275 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5276 || !type_has_mode_precision_p (TREE_TYPE (op)))
5277 /* But a conversion that does not change the bit-pattern is ok. */
5278 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5279 > TYPE_PRECISION (TREE_TYPE (op)))
5280 && TYPE_UNSIGNED (TREE_TYPE (op))))
5282 if (dump_enabled_p ())
5283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5284 "type conversion to/from bit-precision "
5285 "unsupported.\n");
5286 return false;
5289 if (!vec_stmt) /* transformation not required. */
5291 if (slp_node
5292 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5294 if (dump_enabled_p ())
5295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5296 "incompatible vector types for invariants\n");
5297 return false;
5299 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5300 DUMP_VECT_SCOPE ("vectorizable_assignment");
5301 if (!vect_nop_conversion_p (stmt_info))
5302 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5303 cost_vec);
5304 return true;
5307 /* Transform. */
5308 if (dump_enabled_p ())
5309 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5311 /* Handle def. */
5312 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5314 /* Handle use. */
5315 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5317 /* Arguments are ready. create the new vector stmt. */
5318 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5320 if (CONVERT_EXPR_CODE_P (code)
5321 || code == VIEW_CONVERT_EXPR)
5322 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5323 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5324 new_temp = make_ssa_name (vec_dest, new_stmt);
5325 gimple_assign_set_lhs (new_stmt, new_temp);
5326 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5327 if (slp_node)
5328 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5329 else
5330 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5332 if (!slp_node)
5333 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5335 vec_oprnds.release ();
5336 return true;
5340 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5341 either as shift by a scalar or by a vector. */
5343 bool
5344 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5347 machine_mode vec_mode;
5348 optab optab;
5349 int icode;
5350 tree vectype;
5352 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5353 if (!vectype)
5354 return false;
5356 optab = optab_for_tree_code (code, vectype, optab_scalar);
5357 if (!optab
5358 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5360 optab = optab_for_tree_code (code, vectype, optab_vector);
5361 if (!optab
5362 || (optab_handler (optab, TYPE_MODE (vectype))
5363 == CODE_FOR_nothing))
5364 return false;
5367 vec_mode = TYPE_MODE (vectype);
5368 icode = (int) optab_handler (optab, vec_mode);
5369 if (icode == CODE_FOR_nothing)
5370 return false;
5372 return true;
5376 /* Function vectorizable_shift.
5378 Check if STMT_INFO performs a shift operation that can be vectorized.
5379 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5380 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5381 Return true if STMT_INFO is vectorizable in this way. */
5383 static bool
5384 vectorizable_shift (vec_info *vinfo,
5385 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5386 gimple **vec_stmt, slp_tree slp_node,
5387 stmt_vector_for_cost *cost_vec)
5389 tree vec_dest;
5390 tree scalar_dest;
5391 tree op0, op1 = NULL;
5392 tree vec_oprnd1 = NULL_TREE;
5393 tree vectype;
5394 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5395 enum tree_code code;
5396 machine_mode vec_mode;
5397 tree new_temp;
5398 optab optab;
5399 int icode;
5400 machine_mode optab_op2_mode;
5401 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5402 int ndts = 2;
5403 poly_uint64 nunits_in;
5404 poly_uint64 nunits_out;
5405 tree vectype_out;
5406 tree op1_vectype;
5407 int ncopies;
5408 int i;
5409 vec<tree> vec_oprnds0 = vNULL;
5410 vec<tree> vec_oprnds1 = vNULL;
5411 tree vop0, vop1;
5412 unsigned int k;
5413 bool scalar_shift_arg = true;
5414 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5415 bool incompatible_op1_vectype_p = false;
5417 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5418 return false;
5420 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5421 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5422 && ! vec_stmt)
5423 return false;
5425 /* Is STMT a vectorizable binary/unary operation? */
5426 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5427 if (!stmt)
5428 return false;
5430 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5431 return false;
5433 code = gimple_assign_rhs_code (stmt);
5435 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5436 || code == RROTATE_EXPR))
5437 return false;
5439 scalar_dest = gimple_assign_lhs (stmt);
5440 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5441 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5443 if (dump_enabled_p ())
5444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5445 "bit-precision shifts not supported.\n");
5446 return false;
5449 slp_tree slp_op0;
5450 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5451 0, &op0, &slp_op0, &dt[0], &vectype))
5453 if (dump_enabled_p ())
5454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5455 "use not simple.\n");
5456 return false;
5458 /* If op0 is an external or constant def, infer the vector type
5459 from the scalar type. */
5460 if (!vectype)
5461 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5462 if (vec_stmt)
5463 gcc_assert (vectype);
5464 if (!vectype)
5466 if (dump_enabled_p ())
5467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5468 "no vectype for scalar type\n");
5469 return false;
5472 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5473 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5474 if (maybe_ne (nunits_out, nunits_in))
5475 return false;
5477 stmt_vec_info op1_def_stmt_info;
5478 slp_tree slp_op1;
5479 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5480 &dt[1], &op1_vectype, &op1_def_stmt_info))
5482 if (dump_enabled_p ())
5483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5484 "use not simple.\n");
5485 return false;
5488 /* Multiple types in SLP are handled by creating the appropriate number of
5489 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5490 case of SLP. */
5491 if (slp_node)
5492 ncopies = 1;
5493 else
5494 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5496 gcc_assert (ncopies >= 1);
5498 /* Determine whether the shift amount is a vector, or scalar. If the
5499 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5501 if ((dt[1] == vect_internal_def
5502 || dt[1] == vect_induction_def
5503 || dt[1] == vect_nested_cycle)
5504 && !slp_node)
5505 scalar_shift_arg = false;
5506 else if (dt[1] == vect_constant_def
5507 || dt[1] == vect_external_def
5508 || dt[1] == vect_internal_def)
5510 /* In SLP, need to check whether the shift count is the same,
5511 in loops if it is a constant or invariant, it is always
5512 a scalar shift. */
5513 if (slp_node)
5515 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5516 stmt_vec_info slpstmt_info;
5518 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5520 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5521 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5522 scalar_shift_arg = false;
5525 /* For internal SLP defs we have to make sure we see scalar stmts
5526 for all vector elements.
5527 ??? For different vectors we could resort to a different
5528 scalar shift operand but code-generation below simply always
5529 takes the first. */
5530 if (dt[1] == vect_internal_def
5531 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5532 stmts.length ()))
5533 scalar_shift_arg = false;
5536 /* If the shift amount is computed by a pattern stmt we cannot
5537 use the scalar amount directly thus give up and use a vector
5538 shift. */
5539 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5540 scalar_shift_arg = false;
5542 else
5544 if (dump_enabled_p ())
5545 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5546 "operand mode requires invariant argument.\n");
5547 return false;
5550 /* Vector shifted by vector. */
5551 bool was_scalar_shift_arg = scalar_shift_arg;
5552 if (!scalar_shift_arg)
5554 optab = optab_for_tree_code (code, vectype, optab_vector);
5555 if (dump_enabled_p ())
5556 dump_printf_loc (MSG_NOTE, vect_location,
5557 "vector/vector shift/rotate found.\n");
5559 if (!op1_vectype)
5560 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5561 slp_op1);
5562 incompatible_op1_vectype_p
5563 = (op1_vectype == NULL_TREE
5564 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5565 TYPE_VECTOR_SUBPARTS (vectype))
5566 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5567 if (incompatible_op1_vectype_p
5568 && (!slp_node
5569 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5570 || slp_op1->refcnt != 1))
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5574 "unusable type for last operand in"
5575 " vector/vector shift/rotate.\n");
5576 return false;
5579 /* See if the machine has a vector shifted by scalar insn and if not
5580 then see if it has a vector shifted by vector insn. */
5581 else
5583 optab = optab_for_tree_code (code, vectype, optab_scalar);
5584 if (optab
5585 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5587 if (dump_enabled_p ())
5588 dump_printf_loc (MSG_NOTE, vect_location,
5589 "vector/scalar shift/rotate found.\n");
5591 else
5593 optab = optab_for_tree_code (code, vectype, optab_vector);
5594 if (optab
5595 && (optab_handler (optab, TYPE_MODE (vectype))
5596 != CODE_FOR_nothing))
5598 scalar_shift_arg = false;
5600 if (dump_enabled_p ())
5601 dump_printf_loc (MSG_NOTE, vect_location,
5602 "vector/vector shift/rotate found.\n");
5604 if (!op1_vectype)
5605 op1_vectype = get_vectype_for_scalar_type (vinfo,
5606 TREE_TYPE (op1),
5607 slp_op1);
5609 /* Unlike the other binary operators, shifts/rotates have
5610 the rhs being int, instead of the same type as the lhs,
5611 so make sure the scalar is the right type if we are
5612 dealing with vectors of long long/long/short/char. */
5613 incompatible_op1_vectype_p
5614 = (!op1_vectype
5615 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5616 TREE_TYPE (op1)));
5617 if (incompatible_op1_vectype_p
5618 && dt[1] == vect_internal_def)
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5622 "unusable type for last operand in"
5623 " vector/vector shift/rotate.\n");
5624 return false;
5630 /* Supportable by target? */
5631 if (!optab)
5633 if (dump_enabled_p ())
5634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5635 "no optab.\n");
5636 return false;
5638 vec_mode = TYPE_MODE (vectype);
5639 icode = (int) optab_handler (optab, vec_mode);
5640 if (icode == CODE_FOR_nothing)
5642 if (dump_enabled_p ())
5643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5644 "op not supported by target.\n");
5645 /* Check only during analysis. */
5646 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5647 || (!vec_stmt
5648 && !vect_worthwhile_without_simd_p (vinfo, code)))
5649 return false;
5650 if (dump_enabled_p ())
5651 dump_printf_loc (MSG_NOTE, vect_location,
5652 "proceeding using word mode.\n");
5655 /* Worthwhile without SIMD support? Check only during analysis. */
5656 if (!vec_stmt
5657 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5658 && !vect_worthwhile_without_simd_p (vinfo, code))
5660 if (dump_enabled_p ())
5661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5662 "not worthwhile without SIMD support.\n");
5663 return false;
5666 if (!vec_stmt) /* transformation not required. */
5668 if (slp_node
5669 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5670 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5671 && (!incompatible_op1_vectype_p
5672 || dt[1] == vect_constant_def)
5673 && !vect_maybe_update_slp_op_vectype
5674 (slp_op1,
5675 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679 "incompatible vector types for invariants\n");
5680 return false;
5682 /* Now adjust the constant shift amount in place. */
5683 if (slp_node
5684 && incompatible_op1_vectype_p
5685 && dt[1] == vect_constant_def)
5687 for (unsigned i = 0;
5688 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5690 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5691 = fold_convert (TREE_TYPE (vectype),
5692 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5693 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5694 == INTEGER_CST));
5697 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5698 DUMP_VECT_SCOPE ("vectorizable_shift");
5699 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5700 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5701 return true;
5704 /* Transform. */
5706 if (dump_enabled_p ())
5707 dump_printf_loc (MSG_NOTE, vect_location,
5708 "transform binary/unary operation.\n");
5710 if (incompatible_op1_vectype_p && !slp_node)
5712 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5713 op1 = fold_convert (TREE_TYPE (vectype), op1);
5714 if (dt[1] != vect_constant_def)
5715 op1 = vect_init_vector (vinfo, stmt_info, op1,
5716 TREE_TYPE (vectype), NULL);
5719 /* Handle def. */
5720 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5722 if (scalar_shift_arg && dt[1] != vect_internal_def)
5724 /* Vector shl and shr insn patterns can be defined with scalar
5725 operand 2 (shift operand). In this case, use constant or loop
5726 invariant op1 directly, without extending it to vector mode
5727 first. */
5728 optab_op2_mode = insn_data[icode].operand[2].mode;
5729 if (!VECTOR_MODE_P (optab_op2_mode))
5731 if (dump_enabled_p ())
5732 dump_printf_loc (MSG_NOTE, vect_location,
5733 "operand 1 using scalar mode.\n");
5734 vec_oprnd1 = op1;
5735 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5736 vec_oprnds1.quick_push (vec_oprnd1);
5737 /* Store vec_oprnd1 for every vector stmt to be created.
5738 We check during the analysis that all the shift arguments
5739 are the same.
5740 TODO: Allow different constants for different vector
5741 stmts generated for an SLP instance. */
5742 for (k = 0;
5743 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5744 vec_oprnds1.quick_push (vec_oprnd1);
5747 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5749 if (was_scalar_shift_arg)
5751 /* If the argument was the same in all lanes create
5752 the correctly typed vector shift amount directly. */
5753 op1 = fold_convert (TREE_TYPE (vectype), op1);
5754 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5755 !loop_vinfo ? gsi : NULL);
5756 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5757 !loop_vinfo ? gsi : NULL);
5758 vec_oprnds1.create (slp_node->vec_stmts_size);
5759 for (k = 0; k < slp_node->vec_stmts_size; k++)
5760 vec_oprnds1.quick_push (vec_oprnd1);
5762 else if (dt[1] == vect_constant_def)
5763 /* The constant shift amount has been adjusted in place. */
5765 else
5766 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5769 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5770 (a special case for certain kind of vector shifts); otherwise,
5771 operand 1 should be of a vector type (the usual case). */
5772 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5773 op0, &vec_oprnds0,
5774 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5776 /* Arguments are ready. Create the new vector stmt. */
5777 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5779 /* For internal defs where we need to use a scalar shift arg
5780 extract the first lane. */
5781 if (scalar_shift_arg && dt[1] == vect_internal_def)
5783 vop1 = vec_oprnds1[0];
5784 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5785 gassign *new_stmt
5786 = gimple_build_assign (new_temp,
5787 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5788 vop1,
5789 TYPE_SIZE (TREE_TYPE (new_temp)),
5790 bitsize_zero_node));
5791 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5792 vop1 = new_temp;
5794 else
5795 vop1 = vec_oprnds1[i];
5796 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5797 new_temp = make_ssa_name (vec_dest, new_stmt);
5798 gimple_assign_set_lhs (new_stmt, new_temp);
5799 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5800 if (slp_node)
5801 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5802 else
5803 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5806 if (!slp_node)
5807 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5809 vec_oprnds0.release ();
5810 vec_oprnds1.release ();
5812 return true;
5816 /* Function vectorizable_operation.
5818 Check if STMT_INFO performs a binary, unary or ternary operation that can
5819 be vectorized.
5820 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5821 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5822 Return true if STMT_INFO is vectorizable in this way. */
5824 static bool
5825 vectorizable_operation (vec_info *vinfo,
5826 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5827 gimple **vec_stmt, slp_tree slp_node,
5828 stmt_vector_for_cost *cost_vec)
5830 tree vec_dest;
5831 tree scalar_dest;
5832 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5833 tree vectype;
5834 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5835 enum tree_code code, orig_code;
5836 machine_mode vec_mode;
5837 tree new_temp;
5838 int op_type;
5839 optab optab;
5840 bool target_support_p;
5841 enum vect_def_type dt[3]
5842 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5843 int ndts = 3;
5844 poly_uint64 nunits_in;
5845 poly_uint64 nunits_out;
5846 tree vectype_out;
5847 int ncopies, vec_num;
5848 int i;
5849 vec<tree> vec_oprnds0 = vNULL;
5850 vec<tree> vec_oprnds1 = vNULL;
5851 vec<tree> vec_oprnds2 = vNULL;
5852 tree vop0, vop1, vop2;
5853 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5855 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5856 return false;
5858 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5859 && ! vec_stmt)
5860 return false;
5862 /* Is STMT a vectorizable binary/unary operation? */
5863 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5864 if (!stmt)
5865 return false;
5867 /* Loads and stores are handled in vectorizable_{load,store}. */
5868 if (STMT_VINFO_DATA_REF (stmt_info))
5869 return false;
5871 orig_code = code = gimple_assign_rhs_code (stmt);
5873 /* Shifts are handled in vectorizable_shift. */
5874 if (code == LSHIFT_EXPR
5875 || code == RSHIFT_EXPR
5876 || code == LROTATE_EXPR
5877 || code == RROTATE_EXPR)
5878 return false;
5880 /* Comparisons are handled in vectorizable_comparison. */
5881 if (TREE_CODE_CLASS (code) == tcc_comparison)
5882 return false;
5884 /* Conditions are handled in vectorizable_condition. */
5885 if (code == COND_EXPR)
5886 return false;
5888 /* For pointer addition and subtraction, we should use the normal
5889 plus and minus for the vector operation. */
5890 if (code == POINTER_PLUS_EXPR)
5891 code = PLUS_EXPR;
5892 if (code == POINTER_DIFF_EXPR)
5893 code = MINUS_EXPR;
5895 /* Support only unary or binary operations. */
5896 op_type = TREE_CODE_LENGTH (code);
5897 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5899 if (dump_enabled_p ())
5900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5901 "num. args = %d (not unary/binary/ternary op).\n",
5902 op_type);
5903 return false;
5906 scalar_dest = gimple_assign_lhs (stmt);
5907 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5909 /* Most operations cannot handle bit-precision types without extra
5910 truncations. */
5911 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5912 if (!mask_op_p
5913 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5914 /* Exception are bitwise binary operations. */
5915 && code != BIT_IOR_EXPR
5916 && code != BIT_XOR_EXPR
5917 && code != BIT_AND_EXPR)
5919 if (dump_enabled_p ())
5920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5921 "bit-precision arithmetic not supported.\n");
5922 return false;
5925 slp_tree slp_op0;
5926 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5927 0, &op0, &slp_op0, &dt[0], &vectype))
5929 if (dump_enabled_p ())
5930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5931 "use not simple.\n");
5932 return false;
5934 /* If op0 is an external or constant def, infer the vector type
5935 from the scalar type. */
5936 if (!vectype)
5938 /* For boolean type we cannot determine vectype by
5939 invariant value (don't know whether it is a vector
5940 of booleans or vector of integers). We use output
5941 vectype because operations on boolean don't change
5942 type. */
5943 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5945 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5947 if (dump_enabled_p ())
5948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5949 "not supported operation on bool value.\n");
5950 return false;
5952 vectype = vectype_out;
5954 else
5955 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5956 slp_node);
5958 if (vec_stmt)
5959 gcc_assert (vectype);
5960 if (!vectype)
5962 if (dump_enabled_p ())
5963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5964 "no vectype for scalar type %T\n",
5965 TREE_TYPE (op0));
5967 return false;
5970 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5971 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5972 if (maybe_ne (nunits_out, nunits_in))
5973 return false;
5975 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5976 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5977 if (op_type == binary_op || op_type == ternary_op)
5979 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5980 1, &op1, &slp_op1, &dt[1], &vectype2))
5982 if (dump_enabled_p ())
5983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5984 "use not simple.\n");
5985 return false;
5988 if (op_type == ternary_op)
5990 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5991 2, &op2, &slp_op2, &dt[2], &vectype3))
5993 if (dump_enabled_p ())
5994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5995 "use not simple.\n");
5996 return false;
6000 /* Multiple types in SLP are handled by creating the appropriate number of
6001 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6002 case of SLP. */
6003 if (slp_node)
6005 ncopies = 1;
6006 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6008 else
6010 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6011 vec_num = 1;
6014 gcc_assert (ncopies >= 1);
6016 /* Reject attempts to combine mask types with nonmask types, e.g. if
6017 we have an AND between a (nonmask) boolean loaded from memory and
6018 a (mask) boolean result of a comparison.
6020 TODO: We could easily fix these cases up using pattern statements. */
6021 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6022 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6023 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6025 if (dump_enabled_p ())
6026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6027 "mixed mask and nonmask vector types\n");
6028 return false;
6031 /* Supportable by target? */
6033 vec_mode = TYPE_MODE (vectype);
6034 if (code == MULT_HIGHPART_EXPR)
6035 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6036 else
6038 optab = optab_for_tree_code (code, vectype, optab_default);
6039 if (!optab)
6041 if (dump_enabled_p ())
6042 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6043 "no optab.\n");
6044 return false;
6046 target_support_p = (optab_handler (optab, vec_mode)
6047 != CODE_FOR_nothing);
6050 if (!target_support_p)
6052 if (dump_enabled_p ())
6053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6054 "op not supported by target.\n");
6055 /* Check only during analysis. */
6056 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6057 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6058 return false;
6059 if (dump_enabled_p ())
6060 dump_printf_loc (MSG_NOTE, vect_location,
6061 "proceeding using word mode.\n");
6064 /* Worthwhile without SIMD support? Check only during analysis. */
6065 if (!VECTOR_MODE_P (vec_mode)
6066 && !vec_stmt
6067 && !vect_worthwhile_without_simd_p (vinfo, code))
6069 if (dump_enabled_p ())
6070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6071 "not worthwhile without SIMD support.\n");
6072 return false;
6075 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6076 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6077 internal_fn cond_fn = get_conditional_internal_fn (code);
6079 if (!vec_stmt) /* transformation not required. */
6081 /* If this operation is part of a reduction, a fully-masked loop
6082 should only change the active lanes of the reduction chain,
6083 keeping the inactive lanes as-is. */
6084 if (loop_vinfo
6085 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6086 && reduc_idx >= 0)
6088 if (cond_fn == IFN_LAST
6089 || !direct_internal_fn_supported_p (cond_fn, vectype,
6090 OPTIMIZE_FOR_SPEED))
6092 if (dump_enabled_p ())
6093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6094 "can't use a fully-masked loop because no"
6095 " conditional operation is available.\n");
6096 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6098 else
6099 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6100 vectype, NULL);
6103 /* Put types on constant and invariant SLP children. */
6104 if (slp_node
6105 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6106 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6107 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6109 if (dump_enabled_p ())
6110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6111 "incompatible vector types for invariants\n");
6112 return false;
6115 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6116 DUMP_VECT_SCOPE ("vectorizable_operation");
6117 vect_model_simple_cost (vinfo, stmt_info,
6118 ncopies, dt, ndts, slp_node, cost_vec);
6119 return true;
6122 /* Transform. */
6124 if (dump_enabled_p ())
6125 dump_printf_loc (MSG_NOTE, vect_location,
6126 "transform binary/unary operation.\n");
6128 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6130 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6131 vectors with unsigned elements, but the result is signed. So, we
6132 need to compute the MINUS_EXPR into vectype temporary and
6133 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6134 tree vec_cvt_dest = NULL_TREE;
6135 if (orig_code == POINTER_DIFF_EXPR)
6137 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6138 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6140 /* Handle def. */
6141 else
6142 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6144 /* In case the vectorization factor (VF) is bigger than the number
6145 of elements that we can fit in a vectype (nunits), we have to generate
6146 more than one vector stmt - i.e - we need to "unroll" the
6147 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6148 from one copy of the vector stmt to the next, in the field
6149 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6150 stages to find the correct vector defs to be used when vectorizing
6151 stmts that use the defs of the current stmt. The example below
6152 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6153 we need to create 4 vectorized stmts):
6155 before vectorization:
6156 RELATED_STMT VEC_STMT
6157 S1: x = memref - -
6158 S2: z = x + 1 - -
6160 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6161 there):
6162 RELATED_STMT VEC_STMT
6163 VS1_0: vx0 = memref0 VS1_1 -
6164 VS1_1: vx1 = memref1 VS1_2 -
6165 VS1_2: vx2 = memref2 VS1_3 -
6166 VS1_3: vx3 = memref3 - -
6167 S1: x = load - VS1_0
6168 S2: z = x + 1 - -
6170 step2: vectorize stmt S2 (done here):
6171 To vectorize stmt S2 we first need to find the relevant vector
6172 def for the first operand 'x'. This is, as usual, obtained from
6173 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6174 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6175 relevant vector def 'vx0'. Having found 'vx0' we can generate
6176 the vector stmt VS2_0, and as usual, record it in the
6177 STMT_VINFO_VEC_STMT of stmt S2.
6178 When creating the second copy (VS2_1), we obtain the relevant vector
6179 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6180 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6181 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6182 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6183 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6184 chain of stmts and pointers:
6185 RELATED_STMT VEC_STMT
6186 VS1_0: vx0 = memref0 VS1_1 -
6187 VS1_1: vx1 = memref1 VS1_2 -
6188 VS1_2: vx2 = memref2 VS1_3 -
6189 VS1_3: vx3 = memref3 - -
6190 S1: x = load - VS1_0
6191 VS2_0: vz0 = vx0 + v1 VS2_1 -
6192 VS2_1: vz1 = vx1 + v1 VS2_2 -
6193 VS2_2: vz2 = vx2 + v1 VS2_3 -
6194 VS2_3: vz3 = vx3 + v1 - -
6195 S2: z = x + 1 - VS2_0 */
6197 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6198 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6199 /* Arguments are ready. Create the new vector stmt. */
6200 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6202 gimple *new_stmt = NULL;
6203 vop1 = ((op_type == binary_op || op_type == ternary_op)
6204 ? vec_oprnds1[i] : NULL_TREE);
6205 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6206 if (masked_loop_p && reduc_idx >= 0)
6208 /* Perform the operation on active elements only and take
6209 inactive elements from the reduction chain input. */
6210 gcc_assert (!vop2);
6211 vop2 = reduc_idx == 1 ? vop1 : vop0;
6212 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6213 vectype, i);
6214 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6215 vop0, vop1, vop2);
6216 new_temp = make_ssa_name (vec_dest, call);
6217 gimple_call_set_lhs (call, new_temp);
6218 gimple_call_set_nothrow (call, true);
6219 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6220 new_stmt = call;
6222 else
6224 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6225 new_temp = make_ssa_name (vec_dest, new_stmt);
6226 gimple_assign_set_lhs (new_stmt, new_temp);
6227 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6228 if (vec_cvt_dest)
6230 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6231 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6232 new_temp);
6233 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6234 gimple_assign_set_lhs (new_stmt, new_temp);
6235 vect_finish_stmt_generation (vinfo, stmt_info,
6236 new_stmt, gsi);
6239 if (slp_node)
6240 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6241 else
6242 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6245 if (!slp_node)
6246 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6248 vec_oprnds0.release ();
6249 vec_oprnds1.release ();
6250 vec_oprnds2.release ();
6252 return true;
6255 /* A helper function to ensure data reference DR_INFO's base alignment. */
6257 static void
6258 ensure_base_align (dr_vec_info *dr_info)
6260 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6261 return;
6263 if (dr_info->base_misaligned)
6265 tree base_decl = dr_info->base_decl;
6267 // We should only be able to increase the alignment of a base object if
6268 // we know what its new alignment should be at compile time.
6269 unsigned HOST_WIDE_INT align_base_to =
6270 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6272 if (decl_in_symtab_p (base_decl))
6273 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6274 else if (DECL_ALIGN (base_decl) < align_base_to)
6276 SET_DECL_ALIGN (base_decl, align_base_to);
6277 DECL_USER_ALIGN (base_decl) = 1;
6279 dr_info->base_misaligned = false;
6284 /* Function get_group_alias_ptr_type.
6286 Return the alias type for the group starting at FIRST_STMT_INFO. */
6288 static tree
6289 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6291 struct data_reference *first_dr, *next_dr;
6293 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6294 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6295 while (next_stmt_info)
6297 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6298 if (get_alias_set (DR_REF (first_dr))
6299 != get_alias_set (DR_REF (next_dr)))
6301 if (dump_enabled_p ())
6302 dump_printf_loc (MSG_NOTE, vect_location,
6303 "conflicting alias set types.\n");
6304 return ptr_type_node;
6306 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6308 return reference_alias_ptr_type (DR_REF (first_dr));
6312 /* Function scan_operand_equal_p.
6314 Helper function for check_scan_store. Compare two references
6315 with .GOMP_SIMD_LANE bases. */
6317 static bool
6318 scan_operand_equal_p (tree ref1, tree ref2)
6320 tree ref[2] = { ref1, ref2 };
6321 poly_int64 bitsize[2], bitpos[2];
6322 tree offset[2], base[2];
6323 for (int i = 0; i < 2; ++i)
6325 machine_mode mode;
6326 int unsignedp, reversep, volatilep = 0;
6327 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6328 &offset[i], &mode, &unsignedp,
6329 &reversep, &volatilep);
6330 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6331 return false;
6332 if (TREE_CODE (base[i]) == MEM_REF
6333 && offset[i] == NULL_TREE
6334 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6336 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6337 if (is_gimple_assign (def_stmt)
6338 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6339 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6340 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6342 if (maybe_ne (mem_ref_offset (base[i]), 0))
6343 return false;
6344 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6345 offset[i] = gimple_assign_rhs2 (def_stmt);
6350 if (!operand_equal_p (base[0], base[1], 0))
6351 return false;
6352 if (maybe_ne (bitsize[0], bitsize[1]))
6353 return false;
6354 if (offset[0] != offset[1])
6356 if (!offset[0] || !offset[1])
6357 return false;
6358 if (!operand_equal_p (offset[0], offset[1], 0))
6360 tree step[2];
6361 for (int i = 0; i < 2; ++i)
6363 step[i] = integer_one_node;
6364 if (TREE_CODE (offset[i]) == SSA_NAME)
6366 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6367 if (is_gimple_assign (def_stmt)
6368 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6369 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6370 == INTEGER_CST))
6372 step[i] = gimple_assign_rhs2 (def_stmt);
6373 offset[i] = gimple_assign_rhs1 (def_stmt);
6376 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6378 step[i] = TREE_OPERAND (offset[i], 1);
6379 offset[i] = TREE_OPERAND (offset[i], 0);
6381 tree rhs1 = NULL_TREE;
6382 if (TREE_CODE (offset[i]) == SSA_NAME)
6384 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6385 if (gimple_assign_cast_p (def_stmt))
6386 rhs1 = gimple_assign_rhs1 (def_stmt);
6388 else if (CONVERT_EXPR_P (offset[i]))
6389 rhs1 = TREE_OPERAND (offset[i], 0);
6390 if (rhs1
6391 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6392 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6393 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6394 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6395 offset[i] = rhs1;
6397 if (!operand_equal_p (offset[0], offset[1], 0)
6398 || !operand_equal_p (step[0], step[1], 0))
6399 return false;
6402 return true;
6406 enum scan_store_kind {
6407 /* Normal permutation. */
6408 scan_store_kind_perm,
6410 /* Whole vector left shift permutation with zero init. */
6411 scan_store_kind_lshift_zero,
6413 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6414 scan_store_kind_lshift_cond
6417 /* Function check_scan_store.
6419 Verify if we can perform the needed permutations or whole vector shifts.
6420 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6421 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6422 to do at each step. */
6424 static int
6425 scan_store_can_perm_p (tree vectype, tree init,
6426 vec<enum scan_store_kind> *use_whole_vector = NULL)
6428 enum machine_mode vec_mode = TYPE_MODE (vectype);
6429 unsigned HOST_WIDE_INT nunits;
6430 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6431 return -1;
6432 int units_log2 = exact_log2 (nunits);
6433 if (units_log2 <= 0)
6434 return -1;
6436 int i;
6437 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6438 for (i = 0; i <= units_log2; ++i)
6440 unsigned HOST_WIDE_INT j, k;
6441 enum scan_store_kind kind = scan_store_kind_perm;
6442 vec_perm_builder sel (nunits, nunits, 1);
6443 sel.quick_grow (nunits);
6444 if (i == units_log2)
6446 for (j = 0; j < nunits; ++j)
6447 sel[j] = nunits - 1;
6449 else
6451 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6452 sel[j] = j;
6453 for (k = 0; j < nunits; ++j, ++k)
6454 sel[j] = nunits + k;
6456 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6457 if (!can_vec_perm_const_p (vec_mode, indices))
6459 if (i == units_log2)
6460 return -1;
6462 if (whole_vector_shift_kind == scan_store_kind_perm)
6464 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6465 return -1;
6466 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6467 /* Whole vector shifts shift in zeros, so if init is all zero
6468 constant, there is no need to do anything further. */
6469 if ((TREE_CODE (init) != INTEGER_CST
6470 && TREE_CODE (init) != REAL_CST)
6471 || !initializer_zerop (init))
6473 tree masktype = truth_type_for (vectype);
6474 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6475 return -1;
6476 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6479 kind = whole_vector_shift_kind;
6481 if (use_whole_vector)
6483 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6484 use_whole_vector->safe_grow_cleared (i, true);
6485 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6486 use_whole_vector->safe_push (kind);
6490 return units_log2;
6494 /* Function check_scan_store.
6496 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6498 static bool
6499 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6500 enum vect_def_type rhs_dt, bool slp, tree mask,
6501 vect_memory_access_type memory_access_type)
6503 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6504 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6505 tree ref_type;
6507 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6508 if (slp
6509 || mask
6510 || memory_access_type != VMAT_CONTIGUOUS
6511 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6512 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6513 || loop_vinfo == NULL
6514 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6515 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6516 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6517 || !integer_zerop (DR_INIT (dr_info->dr))
6518 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6519 || !alias_sets_conflict_p (get_alias_set (vectype),
6520 get_alias_set (TREE_TYPE (ref_type))))
6522 if (dump_enabled_p ())
6523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6524 "unsupported OpenMP scan store.\n");
6525 return false;
6528 /* We need to pattern match code built by OpenMP lowering and simplified
6529 by following optimizations into something we can handle.
6530 #pragma omp simd reduction(inscan,+:r)
6531 for (...)
6533 r += something ();
6534 #pragma omp scan inclusive (r)
6535 use (r);
6537 shall have body with:
6538 // Initialization for input phase, store the reduction initializer:
6539 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6540 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6541 D.2042[_21] = 0;
6542 // Actual input phase:
6544 r.0_5 = D.2042[_20];
6545 _6 = _4 + r.0_5;
6546 D.2042[_20] = _6;
6547 // Initialization for scan phase:
6548 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6549 _26 = D.2043[_25];
6550 _27 = D.2042[_25];
6551 _28 = _26 + _27;
6552 D.2043[_25] = _28;
6553 D.2042[_25] = _28;
6554 // Actual scan phase:
6556 r.1_8 = D.2042[_20];
6558 The "omp simd array" variable D.2042 holds the privatized copy used
6559 inside of the loop and D.2043 is another one that holds copies of
6560 the current original list item. The separate GOMP_SIMD_LANE ifn
6561 kinds are there in order to allow optimizing the initializer store
6562 and combiner sequence, e.g. if it is originally some C++ish user
6563 defined reduction, but allow the vectorizer to pattern recognize it
6564 and turn into the appropriate vectorized scan.
6566 For exclusive scan, this is slightly different:
6567 #pragma omp simd reduction(inscan,+:r)
6568 for (...)
6570 use (r);
6571 #pragma omp scan exclusive (r)
6572 r += something ();
6574 shall have body with:
6575 // Initialization for input phase, store the reduction initializer:
6576 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6577 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6578 D.2042[_21] = 0;
6579 // Actual input phase:
6581 r.0_5 = D.2042[_20];
6582 _6 = _4 + r.0_5;
6583 D.2042[_20] = _6;
6584 // Initialization for scan phase:
6585 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6586 _26 = D.2043[_25];
6587 D.2044[_25] = _26;
6588 _27 = D.2042[_25];
6589 _28 = _26 + _27;
6590 D.2043[_25] = _28;
6591 // Actual scan phase:
6593 r.1_8 = D.2044[_20];
6594 ... */
6596 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6598 /* Match the D.2042[_21] = 0; store above. Just require that
6599 it is a constant or external definition store. */
6600 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6602 fail_init:
6603 if (dump_enabled_p ())
6604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6605 "unsupported OpenMP scan initializer store.\n");
6606 return false;
6609 if (! loop_vinfo->scan_map)
6610 loop_vinfo->scan_map = new hash_map<tree, tree>;
6611 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6612 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6613 if (cached)
6614 goto fail_init;
6615 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6617 /* These stores can be vectorized normally. */
6618 return true;
6621 if (rhs_dt != vect_internal_def)
6623 fail:
6624 if (dump_enabled_p ())
6625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6626 "unsupported OpenMP scan combiner pattern.\n");
6627 return false;
6630 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6631 tree rhs = gimple_assign_rhs1 (stmt);
6632 if (TREE_CODE (rhs) != SSA_NAME)
6633 goto fail;
6635 gimple *other_store_stmt = NULL;
6636 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6637 bool inscan_var_store
6638 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6640 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6642 if (!inscan_var_store)
6644 use_operand_p use_p;
6645 imm_use_iterator iter;
6646 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6648 gimple *use_stmt = USE_STMT (use_p);
6649 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6650 continue;
6651 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6652 || !is_gimple_assign (use_stmt)
6653 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6654 || other_store_stmt
6655 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6656 goto fail;
6657 other_store_stmt = use_stmt;
6659 if (other_store_stmt == NULL)
6660 goto fail;
6661 rhs = gimple_assign_lhs (other_store_stmt);
6662 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6663 goto fail;
6666 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6668 use_operand_p use_p;
6669 imm_use_iterator iter;
6670 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6672 gimple *use_stmt = USE_STMT (use_p);
6673 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6674 continue;
6675 if (other_store_stmt)
6676 goto fail;
6677 other_store_stmt = use_stmt;
6680 else
6681 goto fail;
6683 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6684 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6685 || !is_gimple_assign (def_stmt)
6686 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6687 goto fail;
6689 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6690 /* For pointer addition, we should use the normal plus for the vector
6691 operation. */
6692 switch (code)
6694 case POINTER_PLUS_EXPR:
6695 code = PLUS_EXPR;
6696 break;
6697 case MULT_HIGHPART_EXPR:
6698 goto fail;
6699 default:
6700 break;
6702 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6703 goto fail;
6705 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6706 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6707 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6708 goto fail;
6710 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6711 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6712 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6713 || !gimple_assign_load_p (load1_stmt)
6714 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6715 || !gimple_assign_load_p (load2_stmt))
6716 goto fail;
6718 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6719 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6720 if (load1_stmt_info == NULL
6721 || load2_stmt_info == NULL
6722 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6723 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6724 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6725 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6726 goto fail;
6728 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6730 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6731 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6732 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6733 goto fail;
6734 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6735 tree lrhs;
6736 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6737 lrhs = rhs1;
6738 else
6739 lrhs = rhs2;
6740 use_operand_p use_p;
6741 imm_use_iterator iter;
6742 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6744 gimple *use_stmt = USE_STMT (use_p);
6745 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6746 continue;
6747 if (other_store_stmt)
6748 goto fail;
6749 other_store_stmt = use_stmt;
6753 if (other_store_stmt == NULL)
6754 goto fail;
6755 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6756 || !gimple_store_p (other_store_stmt))
6757 goto fail;
6759 stmt_vec_info other_store_stmt_info
6760 = loop_vinfo->lookup_stmt (other_store_stmt);
6761 if (other_store_stmt_info == NULL
6762 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6763 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6764 goto fail;
6766 gimple *stmt1 = stmt;
6767 gimple *stmt2 = other_store_stmt;
6768 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6769 std::swap (stmt1, stmt2);
6770 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6771 gimple_assign_rhs1 (load2_stmt)))
6773 std::swap (rhs1, rhs2);
6774 std::swap (load1_stmt, load2_stmt);
6775 std::swap (load1_stmt_info, load2_stmt_info);
6777 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6778 gimple_assign_rhs1 (load1_stmt)))
6779 goto fail;
6781 tree var3 = NULL_TREE;
6782 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6783 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6784 gimple_assign_rhs1 (load2_stmt)))
6785 goto fail;
6786 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6788 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6789 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6790 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6791 goto fail;
6792 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6793 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6794 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6795 || lookup_attribute ("omp simd inscan exclusive",
6796 DECL_ATTRIBUTES (var3)))
6797 goto fail;
6800 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6801 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6802 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6803 goto fail;
6805 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6806 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6807 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6808 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6809 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6810 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6811 goto fail;
6813 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6814 std::swap (var1, var2);
6816 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6818 if (!lookup_attribute ("omp simd inscan exclusive",
6819 DECL_ATTRIBUTES (var1)))
6820 goto fail;
6821 var1 = var3;
6824 if (loop_vinfo->scan_map == NULL)
6825 goto fail;
6826 tree *init = loop_vinfo->scan_map->get (var1);
6827 if (init == NULL)
6828 goto fail;
6830 /* The IL is as expected, now check if we can actually vectorize it.
6831 Inclusive scan:
6832 _26 = D.2043[_25];
6833 _27 = D.2042[_25];
6834 _28 = _26 + _27;
6835 D.2043[_25] = _28;
6836 D.2042[_25] = _28;
6837 should be vectorized as (where _40 is the vectorized rhs
6838 from the D.2042[_21] = 0; store):
6839 _30 = MEM <vector(8) int> [(int *)&D.2043];
6840 _31 = MEM <vector(8) int> [(int *)&D.2042];
6841 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6842 _33 = _31 + _32;
6843 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6844 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6845 _35 = _33 + _34;
6846 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6847 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6848 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6849 _37 = _35 + _36;
6850 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6851 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6852 _38 = _30 + _37;
6853 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6854 MEM <vector(8) int> [(int *)&D.2043] = _39;
6855 MEM <vector(8) int> [(int *)&D.2042] = _38;
6856 Exclusive scan:
6857 _26 = D.2043[_25];
6858 D.2044[_25] = _26;
6859 _27 = D.2042[_25];
6860 _28 = _26 + _27;
6861 D.2043[_25] = _28;
6862 should be vectorized as (where _40 is the vectorized rhs
6863 from the D.2042[_21] = 0; store):
6864 _30 = MEM <vector(8) int> [(int *)&D.2043];
6865 _31 = MEM <vector(8) int> [(int *)&D.2042];
6866 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6867 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6868 _34 = _32 + _33;
6869 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6870 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6871 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6872 _36 = _34 + _35;
6873 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6874 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6875 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6876 _38 = _36 + _37;
6877 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6878 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6879 _39 = _30 + _38;
6880 _50 = _31 + _39;
6881 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6882 MEM <vector(8) int> [(int *)&D.2044] = _39;
6883 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6884 enum machine_mode vec_mode = TYPE_MODE (vectype);
6885 optab optab = optab_for_tree_code (code, vectype, optab_default);
6886 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6887 goto fail;
6889 int units_log2 = scan_store_can_perm_p (vectype, *init);
6890 if (units_log2 == -1)
6891 goto fail;
6893 return true;
6897 /* Function vectorizable_scan_store.
6899 Helper of vectorizable_score, arguments like on vectorizable_store.
6900 Handle only the transformation, checking is done in check_scan_store. */
6902 static bool
6903 vectorizable_scan_store (vec_info *vinfo,
6904 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6905 gimple **vec_stmt, int ncopies)
6907 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6908 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6909 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6910 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6912 if (dump_enabled_p ())
6913 dump_printf_loc (MSG_NOTE, vect_location,
6914 "transform scan store. ncopies = %d\n", ncopies);
6916 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6917 tree rhs = gimple_assign_rhs1 (stmt);
6918 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6920 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6921 bool inscan_var_store
6922 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6924 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6926 use_operand_p use_p;
6927 imm_use_iterator iter;
6928 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6930 gimple *use_stmt = USE_STMT (use_p);
6931 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6932 continue;
6933 rhs = gimple_assign_lhs (use_stmt);
6934 break;
6938 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6939 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6940 if (code == POINTER_PLUS_EXPR)
6941 code = PLUS_EXPR;
6942 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6943 && commutative_tree_code (code));
6944 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6945 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6946 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6947 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6948 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6949 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6950 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6951 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6952 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6953 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6954 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6956 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6958 std::swap (rhs1, rhs2);
6959 std::swap (var1, var2);
6960 std::swap (load1_dr_info, load2_dr_info);
6963 tree *init = loop_vinfo->scan_map->get (var1);
6964 gcc_assert (init);
6966 unsigned HOST_WIDE_INT nunits;
6967 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6968 gcc_unreachable ();
6969 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6970 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6971 gcc_assert (units_log2 > 0);
6972 auto_vec<tree, 16> perms;
6973 perms.quick_grow (units_log2 + 1);
6974 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6975 for (int i = 0; i <= units_log2; ++i)
6977 unsigned HOST_WIDE_INT j, k;
6978 vec_perm_builder sel (nunits, nunits, 1);
6979 sel.quick_grow (nunits);
6980 if (i == units_log2)
6981 for (j = 0; j < nunits; ++j)
6982 sel[j] = nunits - 1;
6983 else
6985 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6986 sel[j] = j;
6987 for (k = 0; j < nunits; ++j, ++k)
6988 sel[j] = nunits + k;
6990 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6991 if (!use_whole_vector.is_empty ()
6992 && use_whole_vector[i] != scan_store_kind_perm)
6994 if (zero_vec == NULL_TREE)
6995 zero_vec = build_zero_cst (vectype);
6996 if (masktype == NULL_TREE
6997 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6998 masktype = truth_type_for (vectype);
6999 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7001 else
7002 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7005 tree vec_oprnd1 = NULL_TREE;
7006 tree vec_oprnd2 = NULL_TREE;
7007 tree vec_oprnd3 = NULL_TREE;
7008 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7009 tree dataref_offset = build_int_cst (ref_type, 0);
7010 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7011 vectype, VMAT_CONTIGUOUS);
7012 tree ldataref_ptr = NULL_TREE;
7013 tree orig = NULL_TREE;
7014 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7015 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7016 auto_vec<tree> vec_oprnds1;
7017 auto_vec<tree> vec_oprnds2;
7018 auto_vec<tree> vec_oprnds3;
7019 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7020 *init, &vec_oprnds1,
7021 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7022 rhs2, &vec_oprnds3);
7023 for (int j = 0; j < ncopies; j++)
7025 vec_oprnd1 = vec_oprnds1[j];
7026 if (ldataref_ptr == NULL)
7027 vec_oprnd2 = vec_oprnds2[j];
7028 vec_oprnd3 = vec_oprnds3[j];
7029 if (j == 0)
7030 orig = vec_oprnd3;
7031 else if (!inscan_var_store)
7032 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7034 if (ldataref_ptr)
7036 vec_oprnd2 = make_ssa_name (vectype);
7037 tree data_ref = fold_build2 (MEM_REF, vectype,
7038 unshare_expr (ldataref_ptr),
7039 dataref_offset);
7040 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7041 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7042 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7043 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7044 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7047 tree v = vec_oprnd2;
7048 for (int i = 0; i < units_log2; ++i)
7050 tree new_temp = make_ssa_name (vectype);
7051 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7052 (zero_vec
7053 && (use_whole_vector[i]
7054 != scan_store_kind_perm))
7055 ? zero_vec : vec_oprnd1, v,
7056 perms[i]);
7057 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7058 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7059 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7061 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7063 /* Whole vector shift shifted in zero bits, but if *init
7064 is not initializer_zerop, we need to replace those elements
7065 with elements from vec_oprnd1. */
7066 tree_vector_builder vb (masktype, nunits, 1);
7067 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7068 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7069 ? boolean_false_node : boolean_true_node);
7071 tree new_temp2 = make_ssa_name (vectype);
7072 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7073 new_temp, vec_oprnd1);
7074 vect_finish_stmt_generation (vinfo, stmt_info,
7075 g, gsi);
7076 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7077 new_temp = new_temp2;
7080 /* For exclusive scan, perform the perms[i] permutation once
7081 more. */
7082 if (i == 0
7083 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7084 && v == vec_oprnd2)
7086 v = new_temp;
7087 --i;
7088 continue;
7091 tree new_temp2 = make_ssa_name (vectype);
7092 g = gimple_build_assign (new_temp2, code, v, new_temp);
7093 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7094 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7096 v = new_temp2;
7099 tree new_temp = make_ssa_name (vectype);
7100 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7101 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7102 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7104 tree last_perm_arg = new_temp;
7105 /* For exclusive scan, new_temp computed above is the exclusive scan
7106 prefix sum. Turn it into inclusive prefix sum for the broadcast
7107 of the last element into orig. */
7108 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7110 last_perm_arg = make_ssa_name (vectype);
7111 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7112 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7113 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7116 orig = make_ssa_name (vectype);
7117 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7118 last_perm_arg, perms[units_log2]);
7119 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7120 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7122 if (!inscan_var_store)
7124 tree data_ref = fold_build2 (MEM_REF, vectype,
7125 unshare_expr (dataref_ptr),
7126 dataref_offset);
7127 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7128 g = gimple_build_assign (data_ref, new_temp);
7129 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7130 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7134 if (inscan_var_store)
7135 for (int j = 0; j < ncopies; j++)
7137 if (j != 0)
7138 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7140 tree data_ref = fold_build2 (MEM_REF, vectype,
7141 unshare_expr (dataref_ptr),
7142 dataref_offset);
7143 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7144 gimple *g = gimple_build_assign (data_ref, orig);
7145 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7146 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7148 return true;
7152 /* Function vectorizable_store.
7154 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7155 that can be vectorized.
7156 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7157 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7158 Return true if STMT_INFO is vectorizable in this way. */
7160 static bool
7161 vectorizable_store (vec_info *vinfo,
7162 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7163 gimple **vec_stmt, slp_tree slp_node,
7164 stmt_vector_for_cost *cost_vec)
7166 tree data_ref;
7167 tree op;
7168 tree vec_oprnd = NULL_TREE;
7169 tree elem_type;
7170 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7171 class loop *loop = NULL;
7172 machine_mode vec_mode;
7173 tree dummy;
7174 enum vect_def_type rhs_dt = vect_unknown_def_type;
7175 enum vect_def_type mask_dt = vect_unknown_def_type;
7176 tree dataref_ptr = NULL_TREE;
7177 tree dataref_offset = NULL_TREE;
7178 gimple *ptr_incr = NULL;
7179 int ncopies;
7180 int j;
7181 stmt_vec_info first_stmt_info;
7182 bool grouped_store;
7183 unsigned int group_size, i;
7184 vec<tree> oprnds = vNULL;
7185 vec<tree> result_chain = vNULL;
7186 tree offset = NULL_TREE;
7187 vec<tree> vec_oprnds = vNULL;
7188 bool slp = (slp_node != NULL);
7189 unsigned int vec_num;
7190 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7191 tree aggr_type;
7192 gather_scatter_info gs_info;
7193 poly_uint64 vf;
7194 vec_load_store_type vls_type;
7195 tree ref_type;
7197 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7198 return false;
7200 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7201 && ! vec_stmt)
7202 return false;
7204 /* Is vectorizable store? */
7206 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7207 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7209 tree scalar_dest = gimple_assign_lhs (assign);
7210 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7211 && is_pattern_stmt_p (stmt_info))
7212 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7213 if (TREE_CODE (scalar_dest) != ARRAY_REF
7214 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7215 && TREE_CODE (scalar_dest) != INDIRECT_REF
7216 && TREE_CODE (scalar_dest) != COMPONENT_REF
7217 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7218 && TREE_CODE (scalar_dest) != REALPART_EXPR
7219 && TREE_CODE (scalar_dest) != MEM_REF)
7220 return false;
7222 else
7224 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7225 if (!call || !gimple_call_internal_p (call))
7226 return false;
7228 internal_fn ifn = gimple_call_internal_fn (call);
7229 if (!internal_store_fn_p (ifn))
7230 return false;
7232 if (slp_node != NULL)
7234 if (dump_enabled_p ())
7235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7236 "SLP of masked stores not supported.\n");
7237 return false;
7240 int mask_index = internal_fn_mask_index (ifn);
7241 if (mask_index >= 0
7242 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7243 &mask, NULL, &mask_dt, &mask_vectype))
7244 return false;
7247 op = vect_get_store_rhs (stmt_info);
7249 /* Cannot have hybrid store SLP -- that would mean storing to the
7250 same location twice. */
7251 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7253 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7254 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7256 if (loop_vinfo)
7258 loop = LOOP_VINFO_LOOP (loop_vinfo);
7259 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7261 else
7262 vf = 1;
7264 /* Multiple types in SLP are handled by creating the appropriate number of
7265 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7266 case of SLP. */
7267 if (slp)
7268 ncopies = 1;
7269 else
7270 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7272 gcc_assert (ncopies >= 1);
7274 /* FORNOW. This restriction should be relaxed. */
7275 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7277 if (dump_enabled_p ())
7278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7279 "multiple types in nested loop.\n");
7280 return false;
7283 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7284 op, &rhs_dt, &rhs_vectype, &vls_type))
7285 return false;
7287 elem_type = TREE_TYPE (vectype);
7288 vec_mode = TYPE_MODE (vectype);
7290 if (!STMT_VINFO_DATA_REF (stmt_info))
7291 return false;
7293 vect_memory_access_type memory_access_type;
7294 enum dr_alignment_support alignment_support_scheme;
7295 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7296 ncopies, &memory_access_type,
7297 &alignment_support_scheme, &gs_info))
7298 return false;
7300 if (mask)
7302 if (memory_access_type == VMAT_CONTIGUOUS)
7304 if (!VECTOR_MODE_P (vec_mode)
7305 || !can_vec_mask_load_store_p (vec_mode,
7306 TYPE_MODE (mask_vectype), false))
7307 return false;
7309 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7310 && (memory_access_type != VMAT_GATHER_SCATTER
7311 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7313 if (dump_enabled_p ())
7314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7315 "unsupported access type for masked store.\n");
7316 return false;
7319 else
7321 /* FORNOW. In some cases can vectorize even if data-type not supported
7322 (e.g. - array initialization with 0). */
7323 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7324 return false;
7327 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7328 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7329 && memory_access_type != VMAT_GATHER_SCATTER
7330 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7331 if (grouped_store)
7333 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7334 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7335 group_size = DR_GROUP_SIZE (first_stmt_info);
7337 else
7339 first_stmt_info = stmt_info;
7340 first_dr_info = dr_info;
7341 group_size = vec_num = 1;
7344 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7346 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7347 memory_access_type))
7348 return false;
7351 if (!vec_stmt) /* transformation not required. */
7353 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7355 if (loop_vinfo
7356 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7357 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7358 group_size, memory_access_type,
7359 &gs_info, mask);
7361 if (slp_node
7362 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7363 vectype))
7365 if (dump_enabled_p ())
7366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7367 "incompatible vector types for invariants\n");
7368 return false;
7371 if (dump_enabled_p ()
7372 && memory_access_type != VMAT_ELEMENTWISE
7373 && memory_access_type != VMAT_GATHER_SCATTER
7374 && alignment_support_scheme != dr_aligned)
7375 dump_printf_loc (MSG_NOTE, vect_location,
7376 "Vectorizing an unaligned access.\n");
7378 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7379 vect_model_store_cost (vinfo, stmt_info, ncopies,
7380 memory_access_type, vls_type, slp_node, cost_vec);
7381 return true;
7383 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7385 /* Transform. */
7387 ensure_base_align (dr_info);
7389 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7391 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7392 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7393 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7394 tree ptr, var, scale, vec_mask;
7395 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7396 tree mask_halfvectype = mask_vectype;
7397 edge pe = loop_preheader_edge (loop);
7398 gimple_seq seq;
7399 basic_block new_bb;
7400 enum { NARROW, NONE, WIDEN } modifier;
7401 poly_uint64 scatter_off_nunits
7402 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7404 if (known_eq (nunits, scatter_off_nunits))
7405 modifier = NONE;
7406 else if (known_eq (nunits * 2, scatter_off_nunits))
7408 modifier = WIDEN;
7410 /* Currently gathers and scatters are only supported for
7411 fixed-length vectors. */
7412 unsigned int count = scatter_off_nunits.to_constant ();
7413 vec_perm_builder sel (count, count, 1);
7414 for (i = 0; i < (unsigned int) count; ++i)
7415 sel.quick_push (i | (count / 2));
7417 vec_perm_indices indices (sel, 1, count);
7418 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7419 indices);
7420 gcc_assert (perm_mask != NULL_TREE);
7422 else if (known_eq (nunits, scatter_off_nunits * 2))
7424 modifier = NARROW;
7426 /* Currently gathers and scatters are only supported for
7427 fixed-length vectors. */
7428 unsigned int count = nunits.to_constant ();
7429 vec_perm_builder sel (count, count, 1);
7430 for (i = 0; i < (unsigned int) count; ++i)
7431 sel.quick_push (i | (count / 2));
7433 vec_perm_indices indices (sel, 2, count);
7434 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7435 gcc_assert (perm_mask != NULL_TREE);
7436 ncopies *= 2;
7438 if (mask)
7439 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7441 else
7442 gcc_unreachable ();
7444 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7445 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7446 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7447 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7448 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7449 scaletype = TREE_VALUE (arglist);
7451 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7452 && TREE_CODE (rettype) == VOID_TYPE);
7454 ptr = fold_convert (ptrtype, gs_info.base);
7455 if (!is_gimple_min_invariant (ptr))
7457 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7458 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7459 gcc_assert (!new_bb);
7462 if (mask == NULL_TREE)
7464 mask_arg = build_int_cst (masktype, -1);
7465 mask_arg = vect_init_vector (vinfo, stmt_info,
7466 mask_arg, masktype, NULL);
7469 scale = build_int_cst (scaletype, gs_info.scale);
7471 auto_vec<tree> vec_oprnds0;
7472 auto_vec<tree> vec_oprnds1;
7473 auto_vec<tree> vec_masks;
7474 if (mask)
7476 tree mask_vectype = truth_type_for (vectype);
7477 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7478 modifier == NARROW
7479 ? ncopies / 2 : ncopies,
7480 mask, &vec_masks, mask_vectype);
7482 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7483 modifier == WIDEN
7484 ? ncopies / 2 : ncopies,
7485 gs_info.offset, &vec_oprnds0);
7486 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7487 modifier == NARROW
7488 ? ncopies / 2 : ncopies,
7489 op, &vec_oprnds1);
7490 for (j = 0; j < ncopies; ++j)
7492 if (modifier == WIDEN)
7494 if (j & 1)
7495 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7496 perm_mask, stmt_info, gsi);
7497 else
7498 op = vec_oprnd0 = vec_oprnds0[j / 2];
7499 src = vec_oprnd1 = vec_oprnds1[j];
7500 if (mask)
7501 mask_op = vec_mask = vec_masks[j];
7503 else if (modifier == NARROW)
7505 if (j & 1)
7506 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7507 perm_mask, stmt_info, gsi);
7508 else
7509 src = vec_oprnd1 = vec_oprnds1[j / 2];
7510 op = vec_oprnd0 = vec_oprnds0[j];
7511 if (mask)
7512 mask_op = vec_mask = vec_masks[j / 2];
7514 else
7516 op = vec_oprnd0 = vec_oprnds0[j];
7517 src = vec_oprnd1 = vec_oprnds1[j];
7518 if (mask)
7519 mask_op = vec_mask = vec_masks[j];
7522 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7524 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7525 TYPE_VECTOR_SUBPARTS (srctype)));
7526 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7527 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7528 gassign *new_stmt
7529 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7530 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7531 src = var;
7534 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7536 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7537 TYPE_VECTOR_SUBPARTS (idxtype)));
7538 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7539 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7540 gassign *new_stmt
7541 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7542 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7543 op = var;
7546 if (mask)
7548 tree utype;
7549 mask_arg = mask_op;
7550 if (modifier == NARROW)
7552 var = vect_get_new_ssa_name (mask_halfvectype,
7553 vect_simple_var);
7554 gassign *new_stmt
7555 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7556 : VEC_UNPACK_LO_EXPR,
7557 mask_op);
7558 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7559 mask_arg = var;
7561 tree optype = TREE_TYPE (mask_arg);
7562 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7563 utype = masktype;
7564 else
7565 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7566 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7567 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7568 gassign *new_stmt
7569 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7570 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7571 mask_arg = var;
7572 if (!useless_type_conversion_p (masktype, utype))
7574 gcc_assert (TYPE_PRECISION (utype)
7575 <= TYPE_PRECISION (masktype));
7576 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7577 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7578 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7579 mask_arg = var;
7583 gcall *new_stmt
7584 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7585 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7587 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7589 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7590 return true;
7592 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7593 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7595 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7596 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7598 if (grouped_store)
7600 /* FORNOW */
7601 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7603 /* We vectorize all the stmts of the interleaving group when we
7604 reach the last stmt in the group. */
7605 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7606 < DR_GROUP_SIZE (first_stmt_info)
7607 && !slp)
7609 *vec_stmt = NULL;
7610 return true;
7613 if (slp)
7615 grouped_store = false;
7616 /* VEC_NUM is the number of vect stmts to be created for this
7617 group. */
7618 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7619 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7620 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7621 == first_stmt_info);
7622 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7623 op = vect_get_store_rhs (first_stmt_info);
7625 else
7626 /* VEC_NUM is the number of vect stmts to be created for this
7627 group. */
7628 vec_num = group_size;
7630 ref_type = get_group_alias_ptr_type (first_stmt_info);
7632 else
7633 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7635 if (dump_enabled_p ())
7636 dump_printf_loc (MSG_NOTE, vect_location,
7637 "transform store. ncopies = %d\n", ncopies);
7639 if (memory_access_type == VMAT_ELEMENTWISE
7640 || memory_access_type == VMAT_STRIDED_SLP)
7642 gimple_stmt_iterator incr_gsi;
7643 bool insert_after;
7644 gimple *incr;
7645 tree offvar;
7646 tree ivstep;
7647 tree running_off;
7648 tree stride_base, stride_step, alias_off;
7649 tree vec_oprnd;
7650 tree dr_offset;
7651 unsigned int g;
7652 /* Checked by get_load_store_type. */
7653 unsigned int const_nunits = nunits.to_constant ();
7655 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7656 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7658 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7659 stride_base
7660 = fold_build_pointer_plus
7661 (DR_BASE_ADDRESS (first_dr_info->dr),
7662 size_binop (PLUS_EXPR,
7663 convert_to_ptrofftype (dr_offset),
7664 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7665 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7667 /* For a store with loop-invariant (but other than power-of-2)
7668 stride (i.e. not a grouped access) like so:
7670 for (i = 0; i < n; i += stride)
7671 array[i] = ...;
7673 we generate a new induction variable and new stores from
7674 the components of the (vectorized) rhs:
7676 for (j = 0; ; j += VF*stride)
7677 vectemp = ...;
7678 tmp1 = vectemp[0];
7679 array[j] = tmp1;
7680 tmp2 = vectemp[1];
7681 array[j + stride] = tmp2;
7685 unsigned nstores = const_nunits;
7686 unsigned lnel = 1;
7687 tree ltype = elem_type;
7688 tree lvectype = vectype;
7689 if (slp)
7691 if (group_size < const_nunits
7692 && const_nunits % group_size == 0)
7694 nstores = const_nunits / group_size;
7695 lnel = group_size;
7696 ltype = build_vector_type (elem_type, group_size);
7697 lvectype = vectype;
7699 /* First check if vec_extract optab doesn't support extraction
7700 of vector elts directly. */
7701 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7702 machine_mode vmode;
7703 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7704 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7705 group_size).exists (&vmode)
7706 || (convert_optab_handler (vec_extract_optab,
7707 TYPE_MODE (vectype), vmode)
7708 == CODE_FOR_nothing))
7710 /* Try to avoid emitting an extract of vector elements
7711 by performing the extracts using an integer type of the
7712 same size, extracting from a vector of those and then
7713 re-interpreting it as the original vector type if
7714 supported. */
7715 unsigned lsize
7716 = group_size * GET_MODE_BITSIZE (elmode);
7717 unsigned int lnunits = const_nunits / group_size;
7718 /* If we can't construct such a vector fall back to
7719 element extracts from the original vector type and
7720 element size stores. */
7721 if (int_mode_for_size (lsize, 0).exists (&elmode)
7722 && VECTOR_MODE_P (TYPE_MODE (vectype))
7723 && related_vector_mode (TYPE_MODE (vectype), elmode,
7724 lnunits).exists (&vmode)
7725 && (convert_optab_handler (vec_extract_optab,
7726 vmode, elmode)
7727 != CODE_FOR_nothing))
7729 nstores = lnunits;
7730 lnel = group_size;
7731 ltype = build_nonstandard_integer_type (lsize, 1);
7732 lvectype = build_vector_type (ltype, nstores);
7734 /* Else fall back to vector extraction anyway.
7735 Fewer stores are more important than avoiding spilling
7736 of the vector we extract from. Compared to the
7737 construction case in vectorizable_load no store-forwarding
7738 issue exists here for reasonable archs. */
7741 else if (group_size >= const_nunits
7742 && group_size % const_nunits == 0)
7744 nstores = 1;
7745 lnel = const_nunits;
7746 ltype = vectype;
7747 lvectype = vectype;
7749 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7750 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7753 ivstep = stride_step;
7754 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7755 build_int_cst (TREE_TYPE (ivstep), vf));
7757 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7759 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7760 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7761 create_iv (stride_base, ivstep, NULL,
7762 loop, &incr_gsi, insert_after,
7763 &offvar, NULL);
7764 incr = gsi_stmt (incr_gsi);
7766 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7768 alias_off = build_int_cst (ref_type, 0);
7769 stmt_vec_info next_stmt_info = first_stmt_info;
7770 for (g = 0; g < group_size; g++)
7772 running_off = offvar;
7773 if (g)
7775 tree size = TYPE_SIZE_UNIT (ltype);
7776 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7777 size);
7778 tree newoff = copy_ssa_name (running_off, NULL);
7779 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7780 running_off, pos);
7781 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7782 running_off = newoff;
7784 if (!slp)
7785 op = vect_get_store_rhs (next_stmt_info);
7786 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7787 op, &vec_oprnds);
7788 unsigned int group_el = 0;
7789 unsigned HOST_WIDE_INT
7790 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7791 for (j = 0; j < ncopies; j++)
7793 vec_oprnd = vec_oprnds[j];
7794 /* Pun the vector to extract from if necessary. */
7795 if (lvectype != vectype)
7797 tree tem = make_ssa_name (lvectype);
7798 gimple *pun
7799 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7800 lvectype, vec_oprnd));
7801 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7802 vec_oprnd = tem;
7804 for (i = 0; i < nstores; i++)
7806 tree newref, newoff;
7807 gimple *incr, *assign;
7808 tree size = TYPE_SIZE (ltype);
7809 /* Extract the i'th component. */
7810 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7811 bitsize_int (i), size);
7812 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7813 size, pos);
7815 elem = force_gimple_operand_gsi (gsi, elem, true,
7816 NULL_TREE, true,
7817 GSI_SAME_STMT);
7819 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7820 group_el * elsz);
7821 newref = build2 (MEM_REF, ltype,
7822 running_off, this_off);
7823 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7825 /* And store it to *running_off. */
7826 assign = gimple_build_assign (newref, elem);
7827 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7829 group_el += lnel;
7830 if (! slp
7831 || group_el == group_size)
7833 newoff = copy_ssa_name (running_off, NULL);
7834 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7835 running_off, stride_step);
7836 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7838 running_off = newoff;
7839 group_el = 0;
7841 if (g == group_size - 1
7842 && !slp)
7844 if (j == 0 && i == 0)
7845 *vec_stmt = assign;
7846 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7850 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7851 vec_oprnds.release ();
7852 if (slp)
7853 break;
7856 return true;
7859 auto_vec<tree> dr_chain (group_size);
7860 oprnds.create (group_size);
7862 /* Gather-scatter accesses perform only component accesses, alignment
7863 is irrelevant for them. */
7864 if (memory_access_type == VMAT_GATHER_SCATTER)
7865 alignment_support_scheme = dr_unaligned_supported;
7866 else
7867 alignment_support_scheme
7868 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7870 gcc_assert (alignment_support_scheme);
7871 vec_loop_masks *loop_masks
7872 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7873 ? &LOOP_VINFO_MASKS (loop_vinfo)
7874 : NULL);
7875 vec_loop_lens *loop_lens
7876 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7877 ? &LOOP_VINFO_LENS (loop_vinfo)
7878 : NULL);
7880 /* Shouldn't go with length-based approach if fully masked. */
7881 gcc_assert (!loop_lens || !loop_masks);
7883 /* Targets with store-lane instructions must not require explicit
7884 realignment. vect_supportable_dr_alignment always returns either
7885 dr_aligned or dr_unaligned_supported for masked operations. */
7886 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7887 && !mask
7888 && !loop_masks)
7889 || alignment_support_scheme == dr_aligned
7890 || alignment_support_scheme == dr_unaligned_supported);
7892 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7893 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7894 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7896 tree bump;
7897 tree vec_offset = NULL_TREE;
7898 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7900 aggr_type = NULL_TREE;
7901 bump = NULL_TREE;
7903 else if (memory_access_type == VMAT_GATHER_SCATTER)
7905 aggr_type = elem_type;
7906 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7907 &bump, &vec_offset);
7909 else
7911 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7912 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7913 else
7914 aggr_type = vectype;
7915 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7916 memory_access_type);
7919 if (mask)
7920 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7922 /* In case the vectorization factor (VF) is bigger than the number
7923 of elements that we can fit in a vectype (nunits), we have to generate
7924 more than one vector stmt - i.e - we need to "unroll" the
7925 vector stmt by a factor VF/nunits. */
7927 /* In case of interleaving (non-unit grouped access):
7929 S1: &base + 2 = x2
7930 S2: &base = x0
7931 S3: &base + 1 = x1
7932 S4: &base + 3 = x3
7934 We create vectorized stores starting from base address (the access of the
7935 first stmt in the chain (S2 in the above example), when the last store stmt
7936 of the chain (S4) is reached:
7938 VS1: &base = vx2
7939 VS2: &base + vec_size*1 = vx0
7940 VS3: &base + vec_size*2 = vx1
7941 VS4: &base + vec_size*3 = vx3
7943 Then permutation statements are generated:
7945 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7946 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7949 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7950 (the order of the data-refs in the output of vect_permute_store_chain
7951 corresponds to the order of scalar stmts in the interleaving chain - see
7952 the documentation of vect_permute_store_chain()).
7954 In case of both multiple types and interleaving, above vector stores and
7955 permutation stmts are created for every copy. The result vector stmts are
7956 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7957 STMT_VINFO_RELATED_STMT for the next copies.
7960 auto_vec<tree> vec_masks;
7961 tree vec_mask = NULL;
7962 auto_vec<tree> vec_offsets;
7963 auto_vec<vec<tree> > gvec_oprnds;
7964 gvec_oprnds.safe_grow_cleared (group_size, true);
7965 for (j = 0; j < ncopies; j++)
7967 gimple *new_stmt;
7968 if (j == 0)
7970 if (slp)
7972 /* Get vectorized arguments for SLP_NODE. */
7973 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7974 op, &vec_oprnds);
7975 vec_oprnd = vec_oprnds[0];
7977 else
7979 /* For interleaved stores we collect vectorized defs for all the
7980 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7981 used as an input to vect_permute_store_chain().
7983 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7984 and OPRNDS are of size 1. */
7985 stmt_vec_info next_stmt_info = first_stmt_info;
7986 for (i = 0; i < group_size; i++)
7988 /* Since gaps are not supported for interleaved stores,
7989 DR_GROUP_SIZE is the exact number of stmts in the chain.
7990 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7991 that there is no interleaving, DR_GROUP_SIZE is 1,
7992 and only one iteration of the loop will be executed. */
7993 op = vect_get_store_rhs (next_stmt_info);
7994 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7995 ncopies, op, &gvec_oprnds[i]);
7996 vec_oprnd = gvec_oprnds[i][0];
7997 dr_chain.quick_push (gvec_oprnds[i][0]);
7998 oprnds.quick_push (gvec_oprnds[i][0]);
7999 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8001 if (mask)
8003 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8004 mask, &vec_masks, mask_vectype);
8005 vec_mask = vec_masks[0];
8009 /* We should have catched mismatched types earlier. */
8010 gcc_assert (useless_type_conversion_p (vectype,
8011 TREE_TYPE (vec_oprnd)));
8012 bool simd_lane_access_p
8013 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8014 if (simd_lane_access_p
8015 && !loop_masks
8016 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8017 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8018 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8019 && integer_zerop (DR_INIT (first_dr_info->dr))
8020 && alias_sets_conflict_p (get_alias_set (aggr_type),
8021 get_alias_set (TREE_TYPE (ref_type))))
8023 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8024 dataref_offset = build_int_cst (ref_type, 0);
8026 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8028 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
8029 &dataref_ptr, &vec_offsets, ncopies);
8030 vec_offset = vec_offsets[0];
8032 else
8033 dataref_ptr
8034 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8035 simd_lane_access_p ? loop : NULL,
8036 offset, &dummy, gsi, &ptr_incr,
8037 simd_lane_access_p, NULL_TREE, bump);
8039 else
8041 /* For interleaved stores we created vectorized defs for all the
8042 defs stored in OPRNDS in the previous iteration (previous copy).
8043 DR_CHAIN is then used as an input to vect_permute_store_chain().
8044 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8045 OPRNDS are of size 1. */
8046 for (i = 0; i < group_size; i++)
8048 vec_oprnd = gvec_oprnds[i][j];
8049 dr_chain[i] = gvec_oprnds[i][j];
8050 oprnds[i] = gvec_oprnds[i][j];
8052 if (mask)
8053 vec_mask = vec_masks[j];
8054 if (dataref_offset)
8055 dataref_offset
8056 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8057 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8058 vec_offset = vec_offsets[j];
8059 else
8060 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8061 stmt_info, bump);
8064 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8066 tree vec_array;
8068 /* Get an array into which we can store the individual vectors. */
8069 vec_array = create_vector_array (vectype, vec_num);
8071 /* Invalidate the current contents of VEC_ARRAY. This should
8072 become an RTL clobber too, which prevents the vector registers
8073 from being upward-exposed. */
8074 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8076 /* Store the individual vectors into the array. */
8077 for (i = 0; i < vec_num; i++)
8079 vec_oprnd = dr_chain[i];
8080 write_vector_array (vinfo, stmt_info,
8081 gsi, vec_oprnd, vec_array, i);
8084 tree final_mask = NULL;
8085 if (loop_masks)
8086 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8087 vectype, j);
8088 if (vec_mask)
8089 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8090 vec_mask, gsi);
8092 gcall *call;
8093 if (final_mask)
8095 /* Emit:
8096 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8097 VEC_ARRAY). */
8098 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8099 tree alias_ptr = build_int_cst (ref_type, align);
8100 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8101 dataref_ptr, alias_ptr,
8102 final_mask, vec_array);
8104 else
8106 /* Emit:
8107 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8108 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8109 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8110 vec_array);
8111 gimple_call_set_lhs (call, data_ref);
8113 gimple_call_set_nothrow (call, true);
8114 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8115 new_stmt = call;
8117 /* Record that VEC_ARRAY is now dead. */
8118 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8120 else
8122 new_stmt = NULL;
8123 if (grouped_store)
8125 if (j == 0)
8126 result_chain.create (group_size);
8127 /* Permute. */
8128 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8129 gsi, &result_chain);
8132 stmt_vec_info next_stmt_info = first_stmt_info;
8133 for (i = 0; i < vec_num; i++)
8135 unsigned misalign;
8136 unsigned HOST_WIDE_INT align;
8138 tree final_mask = NULL_TREE;
8139 if (loop_masks)
8140 final_mask = vect_get_loop_mask (gsi, loop_masks,
8141 vec_num * ncopies,
8142 vectype, vec_num * j + i);
8143 if (vec_mask)
8144 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8145 vec_mask, gsi);
8147 if (memory_access_type == VMAT_GATHER_SCATTER)
8149 tree scale = size_int (gs_info.scale);
8150 gcall *call;
8151 if (final_mask)
8152 call = gimple_build_call_internal
8153 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8154 scale, vec_oprnd, final_mask);
8155 else
8156 call = gimple_build_call_internal
8157 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8158 scale, vec_oprnd);
8159 gimple_call_set_nothrow (call, true);
8160 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8161 new_stmt = call;
8162 break;
8165 if (i > 0)
8166 /* Bump the vector pointer. */
8167 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8168 gsi, stmt_info, bump);
8170 if (slp)
8171 vec_oprnd = vec_oprnds[i];
8172 else if (grouped_store)
8173 /* For grouped stores vectorized defs are interleaved in
8174 vect_permute_store_chain(). */
8175 vec_oprnd = result_chain[i];
8177 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8178 if (aligned_access_p (first_dr_info))
8179 misalign = 0;
8180 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8182 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8183 misalign = 0;
8185 else
8186 misalign = DR_MISALIGNMENT (first_dr_info);
8187 if (dataref_offset == NULL_TREE
8188 && TREE_CODE (dataref_ptr) == SSA_NAME)
8189 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8190 misalign);
8191 align = least_bit_hwi (misalign | align);
8193 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8195 tree perm_mask = perm_mask_for_reverse (vectype);
8196 tree perm_dest = vect_create_destination_var
8197 (vect_get_store_rhs (stmt_info), vectype);
8198 tree new_temp = make_ssa_name (perm_dest);
8200 /* Generate the permute statement. */
8201 gimple *perm_stmt
8202 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8203 vec_oprnd, perm_mask);
8204 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8206 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8207 vec_oprnd = new_temp;
8210 /* Arguments are ready. Create the new vector stmt. */
8211 if (final_mask)
8213 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8214 gcall *call
8215 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8216 dataref_ptr, ptr,
8217 final_mask, vec_oprnd);
8218 gimple_call_set_nothrow (call, true);
8219 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8220 new_stmt = call;
8222 else if (loop_lens)
8224 tree final_len
8225 = vect_get_loop_len (loop_vinfo, loop_lens,
8226 vec_num * ncopies, vec_num * j + i);
8227 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8228 machine_mode vmode = TYPE_MODE (vectype);
8229 opt_machine_mode new_ovmode
8230 = get_len_load_store_mode (vmode, false);
8231 machine_mode new_vmode = new_ovmode.require ();
8232 /* Need conversion if it's wrapped with VnQI. */
8233 if (vmode != new_vmode)
8235 tree new_vtype
8236 = build_vector_type_for_mode (unsigned_intQI_type_node,
8237 new_vmode);
8238 tree var
8239 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8240 vec_oprnd
8241 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8242 gassign *new_stmt
8243 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8244 vec_oprnd);
8245 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8246 gsi);
8247 vec_oprnd = var;
8249 gcall *call
8250 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8251 ptr, final_len, vec_oprnd);
8252 gimple_call_set_nothrow (call, true);
8253 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8254 new_stmt = call;
8256 else
8258 data_ref = fold_build2 (MEM_REF, vectype,
8259 dataref_ptr,
8260 dataref_offset
8261 ? dataref_offset
8262 : build_int_cst (ref_type, 0));
8263 if (aligned_access_p (first_dr_info))
8265 else
8266 TREE_TYPE (data_ref)
8267 = build_aligned_type (TREE_TYPE (data_ref),
8268 align * BITS_PER_UNIT);
8269 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8270 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8271 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8274 if (slp)
8275 continue;
8277 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8278 if (!next_stmt_info)
8279 break;
8282 if (!slp)
8284 if (j == 0)
8285 *vec_stmt = new_stmt;
8286 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8290 for (i = 0; i < group_size; ++i)
8292 vec<tree> oprndsi = gvec_oprnds[i];
8293 oprndsi.release ();
8295 oprnds.release ();
8296 result_chain.release ();
8297 vec_oprnds.release ();
8299 return true;
8302 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8303 VECTOR_CST mask. No checks are made that the target platform supports the
8304 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8305 vect_gen_perm_mask_checked. */
8307 tree
8308 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8310 tree mask_type;
8312 poly_uint64 nunits = sel.length ();
8313 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8315 mask_type = build_vector_type (ssizetype, nunits);
8316 return vec_perm_indices_to_tree (mask_type, sel);
8319 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8320 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8322 tree
8323 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8325 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8326 return vect_gen_perm_mask_any (vectype, sel);
8329 /* Given a vector variable X and Y, that was generated for the scalar
8330 STMT_INFO, generate instructions to permute the vector elements of X and Y
8331 using permutation mask MASK_VEC, insert them at *GSI and return the
8332 permuted vector variable. */
8334 static tree
8335 permute_vec_elements (vec_info *vinfo,
8336 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8337 gimple_stmt_iterator *gsi)
8339 tree vectype = TREE_TYPE (x);
8340 tree perm_dest, data_ref;
8341 gimple *perm_stmt;
8343 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8344 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8345 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8346 else
8347 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8348 data_ref = make_ssa_name (perm_dest);
8350 /* Generate the permute statement. */
8351 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8352 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8354 return data_ref;
8357 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8358 inserting them on the loops preheader edge. Returns true if we
8359 were successful in doing so (and thus STMT_INFO can be moved then),
8360 otherwise returns false. */
8362 static bool
8363 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8365 ssa_op_iter i;
8366 tree op;
8367 bool any = false;
8369 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8371 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8372 if (!gimple_nop_p (def_stmt)
8373 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8375 /* Make sure we don't need to recurse. While we could do
8376 so in simple cases when there are more complex use webs
8377 we don't have an easy way to preserve stmt order to fulfil
8378 dependencies within them. */
8379 tree op2;
8380 ssa_op_iter i2;
8381 if (gimple_code (def_stmt) == GIMPLE_PHI)
8382 return false;
8383 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8385 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8386 if (!gimple_nop_p (def_stmt2)
8387 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8388 return false;
8390 any = true;
8394 if (!any)
8395 return true;
8397 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8399 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8400 if (!gimple_nop_p (def_stmt)
8401 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8403 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8404 gsi_remove (&gsi, false);
8405 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8409 return true;
8412 /* vectorizable_load.
8414 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8415 that can be vectorized.
8416 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8417 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8418 Return true if STMT_INFO is vectorizable in this way. */
8420 static bool
8421 vectorizable_load (vec_info *vinfo,
8422 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8423 gimple **vec_stmt, slp_tree slp_node,
8424 stmt_vector_for_cost *cost_vec)
8426 tree scalar_dest;
8427 tree vec_dest = NULL;
8428 tree data_ref = NULL;
8429 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8430 class loop *loop = NULL;
8431 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8432 bool nested_in_vect_loop = false;
8433 tree elem_type;
8434 tree new_temp;
8435 machine_mode mode;
8436 tree dummy;
8437 tree dataref_ptr = NULL_TREE;
8438 tree dataref_offset = NULL_TREE;
8439 gimple *ptr_incr = NULL;
8440 int ncopies;
8441 int i, j;
8442 unsigned int group_size;
8443 poly_uint64 group_gap_adj;
8444 tree msq = NULL_TREE, lsq;
8445 tree offset = NULL_TREE;
8446 tree byte_offset = NULL_TREE;
8447 tree realignment_token = NULL_TREE;
8448 gphi *phi = NULL;
8449 vec<tree> dr_chain = vNULL;
8450 bool grouped_load = false;
8451 stmt_vec_info first_stmt_info;
8452 stmt_vec_info first_stmt_info_for_drptr = NULL;
8453 bool compute_in_loop = false;
8454 class loop *at_loop;
8455 int vec_num;
8456 bool slp = (slp_node != NULL);
8457 bool slp_perm = false;
8458 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8459 poly_uint64 vf;
8460 tree aggr_type;
8461 gather_scatter_info gs_info;
8462 tree ref_type;
8463 enum vect_def_type mask_dt = vect_unknown_def_type;
8465 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8466 return false;
8468 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8469 && ! vec_stmt)
8470 return false;
8472 if (!STMT_VINFO_DATA_REF (stmt_info))
8473 return false;
8475 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8476 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8477 which can be different when reduction chains were re-ordered.
8478 Now that we figured we're a dataref reset stmt_info back to
8479 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8480 refactored in a way to maintain the dr_vec_info pointer for the
8481 relevant access explicitely. */
8482 stmt_vec_info orig_stmt_info = stmt_info;
8483 if (slp_node)
8484 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8486 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8487 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8489 scalar_dest = gimple_assign_lhs (assign);
8490 if (TREE_CODE (scalar_dest) != SSA_NAME)
8491 return false;
8493 tree_code code = gimple_assign_rhs_code (assign);
8494 if (code != ARRAY_REF
8495 && code != BIT_FIELD_REF
8496 && code != INDIRECT_REF
8497 && code != COMPONENT_REF
8498 && code != IMAGPART_EXPR
8499 && code != REALPART_EXPR
8500 && code != MEM_REF
8501 && TREE_CODE_CLASS (code) != tcc_declaration)
8502 return false;
8504 else
8506 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8507 if (!call || !gimple_call_internal_p (call))
8508 return false;
8510 internal_fn ifn = gimple_call_internal_fn (call);
8511 if (!internal_load_fn_p (ifn))
8512 return false;
8514 scalar_dest = gimple_call_lhs (call);
8515 if (!scalar_dest)
8516 return false;
8518 int mask_index = internal_fn_mask_index (ifn);
8519 if (mask_index >= 0
8520 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node,
8521 /* ??? For SLP we only have operands for
8522 the mask operand. */
8523 slp_node ? 0 : mask_index,
8524 &mask, NULL, &mask_dt, &mask_vectype))
8525 return false;
8528 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8529 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8531 if (loop_vinfo)
8533 loop = LOOP_VINFO_LOOP (loop_vinfo);
8534 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8535 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8537 else
8538 vf = 1;
8540 /* Multiple types in SLP are handled by creating the appropriate number of
8541 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8542 case of SLP. */
8543 if (slp)
8544 ncopies = 1;
8545 else
8546 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8548 gcc_assert (ncopies >= 1);
8550 /* FORNOW. This restriction should be relaxed. */
8551 if (nested_in_vect_loop && ncopies > 1)
8553 if (dump_enabled_p ())
8554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8555 "multiple types in nested loop.\n");
8556 return false;
8559 /* Invalidate assumptions made by dependence analysis when vectorization
8560 on the unrolled body effectively re-orders stmts. */
8561 if (ncopies > 1
8562 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8563 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8564 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8566 if (dump_enabled_p ())
8567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8568 "cannot perform implicit CSE when unrolling "
8569 "with negative dependence distance\n");
8570 return false;
8573 elem_type = TREE_TYPE (vectype);
8574 mode = TYPE_MODE (vectype);
8576 /* FORNOW. In some cases can vectorize even if data-type not supported
8577 (e.g. - data copies). */
8578 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8580 if (dump_enabled_p ())
8581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8582 "Aligned load, but unsupported type.\n");
8583 return false;
8586 /* Check if the load is a part of an interleaving chain. */
8587 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8589 grouped_load = true;
8590 /* FORNOW */
8591 gcc_assert (!nested_in_vect_loop);
8592 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8594 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8595 group_size = DR_GROUP_SIZE (first_stmt_info);
8597 /* Refuse non-SLP vectorization of SLP-only groups. */
8598 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8600 if (dump_enabled_p ())
8601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8602 "cannot vectorize load in non-SLP mode.\n");
8603 return false;
8606 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8608 slp_perm = true;
8610 if (!loop_vinfo)
8612 /* In BB vectorization we may not actually use a loaded vector
8613 accessing elements in excess of DR_GROUP_SIZE. */
8614 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8615 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8616 unsigned HOST_WIDE_INT nunits;
8617 unsigned j, k, maxk = 0;
8618 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8619 if (k > maxk)
8620 maxk = k;
8621 tree vectype = STMT_VINFO_VECTYPE (group_info);
8622 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8623 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8625 if (dump_enabled_p ())
8626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8627 "BB vectorization with gaps at the end of "
8628 "a load is not supported\n");
8629 return false;
8633 auto_vec<tree> tem;
8634 unsigned n_perms;
8635 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8636 true, &n_perms))
8638 if (dump_enabled_p ())
8639 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8640 vect_location,
8641 "unsupported load permutation\n");
8642 return false;
8646 /* Invalidate assumptions made by dependence analysis when vectorization
8647 on the unrolled body effectively re-orders stmts. */
8648 if (!PURE_SLP_STMT (stmt_info)
8649 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8650 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8651 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8653 if (dump_enabled_p ())
8654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8655 "cannot perform implicit CSE when performing "
8656 "group loads with negative dependence distance\n");
8657 return false;
8660 else
8661 group_size = 1;
8663 vect_memory_access_type memory_access_type;
8664 enum dr_alignment_support alignment_support_scheme;
8665 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8666 ncopies, &memory_access_type,
8667 &alignment_support_scheme, &gs_info))
8668 return false;
8670 if (mask)
8672 if (memory_access_type == VMAT_CONTIGUOUS)
8674 machine_mode vec_mode = TYPE_MODE (vectype);
8675 if (!VECTOR_MODE_P (vec_mode)
8676 || !can_vec_mask_load_store_p (vec_mode,
8677 TYPE_MODE (mask_vectype), true))
8678 return false;
8680 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8681 && memory_access_type != VMAT_GATHER_SCATTER)
8683 if (dump_enabled_p ())
8684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8685 "unsupported access type for masked load.\n");
8686 return false;
8690 if (!vec_stmt) /* transformation not required. */
8692 if (slp_node
8693 && mask
8694 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8695 mask_vectype))
8697 if (dump_enabled_p ())
8698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8699 "incompatible vector types for invariants\n");
8700 return false;
8703 if (!slp)
8704 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8706 if (loop_vinfo
8707 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8708 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8709 group_size, memory_access_type,
8710 &gs_info, mask);
8712 if (dump_enabled_p ()
8713 && memory_access_type != VMAT_ELEMENTWISE
8714 && memory_access_type != VMAT_GATHER_SCATTER
8715 && alignment_support_scheme != dr_aligned)
8716 dump_printf_loc (MSG_NOTE, vect_location,
8717 "Vectorizing an unaligned access.\n");
8719 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8720 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8721 slp_node, cost_vec);
8722 return true;
8725 if (!slp)
8726 gcc_assert (memory_access_type
8727 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8729 if (dump_enabled_p ())
8730 dump_printf_loc (MSG_NOTE, vect_location,
8731 "transform load. ncopies = %d\n", ncopies);
8733 /* Transform. */
8735 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8736 ensure_base_align (dr_info);
8738 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8740 vect_build_gather_load_calls (vinfo,
8741 stmt_info, gsi, vec_stmt, &gs_info, mask);
8742 return true;
8745 if (memory_access_type == VMAT_INVARIANT)
8747 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8748 /* If we have versioned for aliasing or the loop doesn't
8749 have any data dependencies that would preclude this,
8750 then we are sure this is a loop invariant load and
8751 thus we can insert it on the preheader edge. */
8752 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8753 && !nested_in_vect_loop
8754 && hoist_defs_of_uses (stmt_info, loop));
8755 if (hoist_p)
8757 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8758 if (dump_enabled_p ())
8759 dump_printf_loc (MSG_NOTE, vect_location,
8760 "hoisting out of the vectorized loop: %G", stmt);
8761 scalar_dest = copy_ssa_name (scalar_dest);
8762 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8763 gsi_insert_on_edge_immediate
8764 (loop_preheader_edge (loop),
8765 gimple_build_assign (scalar_dest, rhs));
8767 /* These copies are all equivalent, but currently the representation
8768 requires a separate STMT_VINFO_VEC_STMT for each one. */
8769 gimple_stmt_iterator gsi2 = *gsi;
8770 gsi_next (&gsi2);
8771 for (j = 0; j < ncopies; j++)
8773 if (hoist_p)
8774 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8775 vectype, NULL);
8776 else
8777 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8778 vectype, &gsi2);
8779 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8780 if (slp)
8781 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8782 else
8784 if (j == 0)
8785 *vec_stmt = new_stmt;
8786 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8789 return true;
8792 if (memory_access_type == VMAT_ELEMENTWISE
8793 || memory_access_type == VMAT_STRIDED_SLP)
8795 gimple_stmt_iterator incr_gsi;
8796 bool insert_after;
8797 tree offvar;
8798 tree ivstep;
8799 tree running_off;
8800 vec<constructor_elt, va_gc> *v = NULL;
8801 tree stride_base, stride_step, alias_off;
8802 /* Checked by get_load_store_type. */
8803 unsigned int const_nunits = nunits.to_constant ();
8804 unsigned HOST_WIDE_INT cst_offset = 0;
8805 tree dr_offset;
8807 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8808 gcc_assert (!nested_in_vect_loop);
8810 if (grouped_load)
8812 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8813 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8815 else
8817 first_stmt_info = stmt_info;
8818 first_dr_info = dr_info;
8820 if (slp && grouped_load)
8822 group_size = DR_GROUP_SIZE (first_stmt_info);
8823 ref_type = get_group_alias_ptr_type (first_stmt_info);
8825 else
8827 if (grouped_load)
8828 cst_offset
8829 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8830 * vect_get_place_in_interleaving_chain (stmt_info,
8831 first_stmt_info));
8832 group_size = 1;
8833 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8836 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8837 stride_base
8838 = fold_build_pointer_plus
8839 (DR_BASE_ADDRESS (first_dr_info->dr),
8840 size_binop (PLUS_EXPR,
8841 convert_to_ptrofftype (dr_offset),
8842 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8843 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8845 /* For a load with loop-invariant (but other than power-of-2)
8846 stride (i.e. not a grouped access) like so:
8848 for (i = 0; i < n; i += stride)
8849 ... = array[i];
8851 we generate a new induction variable and new accesses to
8852 form a new vector (or vectors, depending on ncopies):
8854 for (j = 0; ; j += VF*stride)
8855 tmp1 = array[j];
8856 tmp2 = array[j + stride];
8858 vectemp = {tmp1, tmp2, ...}
8861 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8862 build_int_cst (TREE_TYPE (stride_step), vf));
8864 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8866 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8867 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8868 create_iv (stride_base, ivstep, NULL,
8869 loop, &incr_gsi, insert_after,
8870 &offvar, NULL);
8872 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8874 running_off = offvar;
8875 alias_off = build_int_cst (ref_type, 0);
8876 int nloads = const_nunits;
8877 int lnel = 1;
8878 tree ltype = TREE_TYPE (vectype);
8879 tree lvectype = vectype;
8880 auto_vec<tree> dr_chain;
8881 if (memory_access_type == VMAT_STRIDED_SLP)
8883 if (group_size < const_nunits)
8885 /* First check if vec_init optab supports construction from vector
8886 elts directly. Otherwise avoid emitting a constructor of
8887 vector elements by performing the loads using an integer type
8888 of the same size, constructing a vector of those and then
8889 re-interpreting it as the original vector type. This avoids a
8890 huge runtime penalty due to the general inability to perform
8891 store forwarding from smaller stores to a larger load. */
8892 tree ptype;
8893 tree vtype
8894 = vector_vector_composition_type (vectype,
8895 const_nunits / group_size,
8896 &ptype);
8897 if (vtype != NULL_TREE)
8899 nloads = const_nunits / group_size;
8900 lnel = group_size;
8901 lvectype = vtype;
8902 ltype = ptype;
8905 else
8907 nloads = 1;
8908 lnel = const_nunits;
8909 ltype = vectype;
8911 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8913 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8914 else if (nloads == 1)
8915 ltype = vectype;
8917 if (slp)
8919 /* For SLP permutation support we need to load the whole group,
8920 not only the number of vector stmts the permutation result
8921 fits in. */
8922 if (slp_perm)
8924 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8925 variable VF. */
8926 unsigned int const_vf = vf.to_constant ();
8927 ncopies = CEIL (group_size * const_vf, const_nunits);
8928 dr_chain.create (ncopies);
8930 else
8931 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8933 unsigned int group_el = 0;
8934 unsigned HOST_WIDE_INT
8935 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8936 for (j = 0; j < ncopies; j++)
8938 if (nloads > 1)
8939 vec_alloc (v, nloads);
8940 gimple *new_stmt = NULL;
8941 for (i = 0; i < nloads; i++)
8943 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8944 group_el * elsz + cst_offset);
8945 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8946 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8947 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8948 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8949 if (nloads > 1)
8950 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8951 gimple_assign_lhs (new_stmt));
8953 group_el += lnel;
8954 if (! slp
8955 || group_el == group_size)
8957 tree newoff = copy_ssa_name (running_off);
8958 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8959 running_off, stride_step);
8960 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8962 running_off = newoff;
8963 group_el = 0;
8966 if (nloads > 1)
8968 tree vec_inv = build_constructor (lvectype, v);
8969 new_temp = vect_init_vector (vinfo, stmt_info,
8970 vec_inv, lvectype, gsi);
8971 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8972 if (lvectype != vectype)
8974 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8975 VIEW_CONVERT_EXPR,
8976 build1 (VIEW_CONVERT_EXPR,
8977 vectype, new_temp));
8978 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8982 if (slp)
8984 if (slp_perm)
8985 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8986 else
8987 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8989 else
8991 if (j == 0)
8992 *vec_stmt = new_stmt;
8993 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8996 if (slp_perm)
8998 unsigned n_perms;
8999 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9000 false, &n_perms);
9002 return true;
9005 if (memory_access_type == VMAT_GATHER_SCATTER
9006 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9007 grouped_load = false;
9009 if (grouped_load)
9011 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9012 group_size = DR_GROUP_SIZE (first_stmt_info);
9013 /* For SLP vectorization we directly vectorize a subchain
9014 without permutation. */
9015 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9016 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9017 /* For BB vectorization always use the first stmt to base
9018 the data ref pointer on. */
9019 if (bb_vinfo)
9020 first_stmt_info_for_drptr
9021 = vect_find_first_scalar_stmt_in_slp (slp_node);
9023 /* Check if the chain of loads is already vectorized. */
9024 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9025 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9026 ??? But we can only do so if there is exactly one
9027 as we have no way to get at the rest. Leave the CSE
9028 opportunity alone.
9029 ??? With the group load eventually participating
9030 in multiple different permutations (having multiple
9031 slp nodes which refer to the same group) the CSE
9032 is even wrong code. See PR56270. */
9033 && !slp)
9035 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9036 return true;
9038 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9039 group_gap_adj = 0;
9041 /* VEC_NUM is the number of vect stmts to be created for this group. */
9042 if (slp)
9044 grouped_load = false;
9045 /* If an SLP permutation is from N elements to N elements,
9046 and if one vector holds a whole number of N, we can load
9047 the inputs to the permutation in the same way as an
9048 unpermuted sequence. In other cases we need to load the
9049 whole group, not only the number of vector stmts the
9050 permutation result fits in. */
9051 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9052 if (slp_perm
9053 && (group_size != scalar_lanes
9054 || !multiple_p (nunits, group_size)))
9056 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9057 variable VF; see vect_transform_slp_perm_load. */
9058 unsigned int const_vf = vf.to_constant ();
9059 unsigned int const_nunits = nunits.to_constant ();
9060 vec_num = CEIL (group_size * const_vf, const_nunits);
9061 group_gap_adj = vf * group_size - nunits * vec_num;
9063 else
9065 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9066 group_gap_adj
9067 = group_size - scalar_lanes;
9070 else
9071 vec_num = group_size;
9073 ref_type = get_group_alias_ptr_type (first_stmt_info);
9075 else
9077 first_stmt_info = stmt_info;
9078 first_dr_info = dr_info;
9079 group_size = vec_num = 1;
9080 group_gap_adj = 0;
9081 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9084 gcc_assert (alignment_support_scheme);
9085 vec_loop_masks *loop_masks
9086 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9087 ? &LOOP_VINFO_MASKS (loop_vinfo)
9088 : NULL);
9089 vec_loop_lens *loop_lens
9090 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9091 ? &LOOP_VINFO_LENS (loop_vinfo)
9092 : NULL);
9094 /* Shouldn't go with length-based approach if fully masked. */
9095 gcc_assert (!loop_lens || !loop_masks);
9097 /* Targets with store-lane instructions must not require explicit
9098 realignment. vect_supportable_dr_alignment always returns either
9099 dr_aligned or dr_unaligned_supported for masked operations. */
9100 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9101 && !mask
9102 && !loop_masks)
9103 || alignment_support_scheme == dr_aligned
9104 || alignment_support_scheme == dr_unaligned_supported);
9106 /* In case the vectorization factor (VF) is bigger than the number
9107 of elements that we can fit in a vectype (nunits), we have to generate
9108 more than one vector stmt - i.e - we need to "unroll" the
9109 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9110 from one copy of the vector stmt to the next, in the field
9111 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9112 stages to find the correct vector defs to be used when vectorizing
9113 stmts that use the defs of the current stmt. The example below
9114 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9115 need to create 4 vectorized stmts):
9117 before vectorization:
9118 RELATED_STMT VEC_STMT
9119 S1: x = memref - -
9120 S2: z = x + 1 - -
9122 step 1: vectorize stmt S1:
9123 We first create the vector stmt VS1_0, and, as usual, record a
9124 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9125 Next, we create the vector stmt VS1_1, and record a pointer to
9126 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9127 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9128 stmts and pointers:
9129 RELATED_STMT VEC_STMT
9130 VS1_0: vx0 = memref0 VS1_1 -
9131 VS1_1: vx1 = memref1 VS1_2 -
9132 VS1_2: vx2 = memref2 VS1_3 -
9133 VS1_3: vx3 = memref3 - -
9134 S1: x = load - VS1_0
9135 S2: z = x + 1 - -
9138 /* In case of interleaving (non-unit grouped access):
9140 S1: x2 = &base + 2
9141 S2: x0 = &base
9142 S3: x1 = &base + 1
9143 S4: x3 = &base + 3
9145 Vectorized loads are created in the order of memory accesses
9146 starting from the access of the first stmt of the chain:
9148 VS1: vx0 = &base
9149 VS2: vx1 = &base + vec_size*1
9150 VS3: vx3 = &base + vec_size*2
9151 VS4: vx4 = &base + vec_size*3
9153 Then permutation statements are generated:
9155 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9156 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9159 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9160 (the order of the data-refs in the output of vect_permute_load_chain
9161 corresponds to the order of scalar stmts in the interleaving chain - see
9162 the documentation of vect_permute_load_chain()).
9163 The generation of permutation stmts and recording them in
9164 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9166 In case of both multiple types and interleaving, the vector loads and
9167 permutation stmts above are created for every copy. The result vector
9168 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9169 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9171 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9172 on a target that supports unaligned accesses (dr_unaligned_supported)
9173 we generate the following code:
9174 p = initial_addr;
9175 indx = 0;
9176 loop {
9177 p = p + indx * vectype_size;
9178 vec_dest = *(p);
9179 indx = indx + 1;
9182 Otherwise, the data reference is potentially unaligned on a target that
9183 does not support unaligned accesses (dr_explicit_realign_optimized) -
9184 then generate the following code, in which the data in each iteration is
9185 obtained by two vector loads, one from the previous iteration, and one
9186 from the current iteration:
9187 p1 = initial_addr;
9188 msq_init = *(floor(p1))
9189 p2 = initial_addr + VS - 1;
9190 realignment_token = call target_builtin;
9191 indx = 0;
9192 loop {
9193 p2 = p2 + indx * vectype_size
9194 lsq = *(floor(p2))
9195 vec_dest = realign_load (msq, lsq, realignment_token)
9196 indx = indx + 1;
9197 msq = lsq;
9198 } */
9200 /* If the misalignment remains the same throughout the execution of the
9201 loop, we can create the init_addr and permutation mask at the loop
9202 preheader. Otherwise, it needs to be created inside the loop.
9203 This can only occur when vectorizing memory accesses in the inner-loop
9204 nested within an outer-loop that is being vectorized. */
9206 if (nested_in_vect_loop
9207 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9208 GET_MODE_SIZE (TYPE_MODE (vectype))))
9210 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9211 compute_in_loop = true;
9214 bool diff_first_stmt_info
9215 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9217 if ((alignment_support_scheme == dr_explicit_realign_optimized
9218 || alignment_support_scheme == dr_explicit_realign)
9219 && !compute_in_loop)
9221 /* If we have different first_stmt_info, we can't set up realignment
9222 here, since we can't guarantee first_stmt_info DR has been
9223 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9224 distance from first_stmt_info DR instead as below. */
9225 if (!diff_first_stmt_info)
9226 msq = vect_setup_realignment (vinfo,
9227 first_stmt_info, gsi, &realignment_token,
9228 alignment_support_scheme, NULL_TREE,
9229 &at_loop);
9230 if (alignment_support_scheme == dr_explicit_realign_optimized)
9232 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9233 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9234 size_one_node);
9235 gcc_assert (!first_stmt_info_for_drptr);
9238 else
9239 at_loop = loop;
9241 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9242 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9244 tree bump;
9245 tree vec_offset = NULL_TREE;
9246 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9248 aggr_type = NULL_TREE;
9249 bump = NULL_TREE;
9251 else if (memory_access_type == VMAT_GATHER_SCATTER)
9253 aggr_type = elem_type;
9254 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9255 &bump, &vec_offset);
9257 else
9259 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9260 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9261 else
9262 aggr_type = vectype;
9263 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9264 memory_access_type);
9267 vec<tree> vec_offsets = vNULL;
9268 auto_vec<tree> vec_masks;
9269 if (mask)
9270 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9271 mask, &vec_masks, mask_vectype, NULL_TREE);
9272 tree vec_mask = NULL_TREE;
9273 poly_uint64 group_elt = 0;
9274 for (j = 0; j < ncopies; j++)
9276 /* 1. Create the vector or array pointer update chain. */
9277 if (j == 0)
9279 bool simd_lane_access_p
9280 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9281 if (simd_lane_access_p
9282 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9283 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9284 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9285 && integer_zerop (DR_INIT (first_dr_info->dr))
9286 && alias_sets_conflict_p (get_alias_set (aggr_type),
9287 get_alias_set (TREE_TYPE (ref_type)))
9288 && (alignment_support_scheme == dr_aligned
9289 || alignment_support_scheme == dr_unaligned_supported))
9291 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9292 dataref_offset = build_int_cst (ref_type, 0);
9294 else if (diff_first_stmt_info)
9296 dataref_ptr
9297 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9298 aggr_type, at_loop, offset, &dummy,
9299 gsi, &ptr_incr, simd_lane_access_p,
9300 byte_offset, bump);
9301 /* Adjust the pointer by the difference to first_stmt. */
9302 data_reference_p ptrdr
9303 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9304 tree diff
9305 = fold_convert (sizetype,
9306 size_binop (MINUS_EXPR,
9307 DR_INIT (first_dr_info->dr),
9308 DR_INIT (ptrdr)));
9309 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9310 stmt_info, diff);
9311 if (alignment_support_scheme == dr_explicit_realign)
9313 msq = vect_setup_realignment (vinfo,
9314 first_stmt_info_for_drptr, gsi,
9315 &realignment_token,
9316 alignment_support_scheme,
9317 dataref_ptr, &at_loop);
9318 gcc_assert (!compute_in_loop);
9321 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9323 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9324 &dataref_ptr, &vec_offsets, ncopies);
9325 vec_offset = vec_offsets[0];
9327 else
9328 dataref_ptr
9329 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9330 at_loop,
9331 offset, &dummy, gsi, &ptr_incr,
9332 simd_lane_access_p,
9333 byte_offset, bump);
9334 if (mask)
9335 vec_mask = vec_masks[0];
9337 else
9339 if (dataref_offset)
9340 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9341 bump);
9342 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9343 vec_offset = vec_offsets[j];
9344 else
9345 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9346 stmt_info, bump);
9347 if (mask)
9348 vec_mask = vec_masks[j];
9351 if (grouped_load || slp_perm)
9352 dr_chain.create (vec_num);
9354 gimple *new_stmt = NULL;
9355 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9357 tree vec_array;
9359 vec_array = create_vector_array (vectype, vec_num);
9361 tree final_mask = NULL_TREE;
9362 if (loop_masks)
9363 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9364 vectype, j);
9365 if (vec_mask)
9366 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9367 vec_mask, gsi);
9369 gcall *call;
9370 if (final_mask)
9372 /* Emit:
9373 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9374 VEC_MASK). */
9375 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9376 tree alias_ptr = build_int_cst (ref_type, align);
9377 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9378 dataref_ptr, alias_ptr,
9379 final_mask);
9381 else
9383 /* Emit:
9384 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9385 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9386 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9388 gimple_call_set_lhs (call, vec_array);
9389 gimple_call_set_nothrow (call, true);
9390 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9391 new_stmt = call;
9393 /* Extract each vector into an SSA_NAME. */
9394 for (i = 0; i < vec_num; i++)
9396 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9397 vec_array, i);
9398 dr_chain.quick_push (new_temp);
9401 /* Record the mapping between SSA_NAMEs and statements. */
9402 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9404 /* Record that VEC_ARRAY is now dead. */
9405 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9407 else
9409 for (i = 0; i < vec_num; i++)
9411 tree final_mask = NULL_TREE;
9412 if (loop_masks
9413 && memory_access_type != VMAT_INVARIANT)
9414 final_mask = vect_get_loop_mask (gsi, loop_masks,
9415 vec_num * ncopies,
9416 vectype, vec_num * j + i);
9417 if (vec_mask)
9418 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9419 vec_mask, gsi);
9421 if (i > 0)
9422 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9423 gsi, stmt_info, bump);
9425 /* 2. Create the vector-load in the loop. */
9426 switch (alignment_support_scheme)
9428 case dr_aligned:
9429 case dr_unaligned_supported:
9431 unsigned int misalign;
9432 unsigned HOST_WIDE_INT align;
9434 if (memory_access_type == VMAT_GATHER_SCATTER)
9436 tree zero = build_zero_cst (vectype);
9437 tree scale = size_int (gs_info.scale);
9438 gcall *call;
9439 if (final_mask)
9440 call = gimple_build_call_internal
9441 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9442 vec_offset, scale, zero, final_mask);
9443 else
9444 call = gimple_build_call_internal
9445 (IFN_GATHER_LOAD, 4, dataref_ptr,
9446 vec_offset, scale, zero);
9447 gimple_call_set_nothrow (call, true);
9448 new_stmt = call;
9449 data_ref = NULL_TREE;
9450 break;
9453 align =
9454 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9455 if (alignment_support_scheme == dr_aligned)
9457 gcc_assert (aligned_access_p (first_dr_info));
9458 misalign = 0;
9460 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9462 align = dr_alignment
9463 (vect_dr_behavior (vinfo, first_dr_info));
9464 misalign = 0;
9466 else
9467 misalign = DR_MISALIGNMENT (first_dr_info);
9468 if (dataref_offset == NULL_TREE
9469 && TREE_CODE (dataref_ptr) == SSA_NAME)
9470 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9471 align, misalign);
9472 align = least_bit_hwi (misalign | align);
9474 if (final_mask)
9476 tree ptr = build_int_cst (ref_type,
9477 align * BITS_PER_UNIT);
9478 gcall *call
9479 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9480 dataref_ptr, ptr,
9481 final_mask);
9482 gimple_call_set_nothrow (call, true);
9483 new_stmt = call;
9484 data_ref = NULL_TREE;
9486 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9488 tree final_len
9489 = vect_get_loop_len (loop_vinfo, loop_lens,
9490 vec_num * ncopies,
9491 vec_num * j + i);
9492 tree ptr = build_int_cst (ref_type,
9493 align * BITS_PER_UNIT);
9494 gcall *call
9495 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9496 dataref_ptr, ptr,
9497 final_len);
9498 gimple_call_set_nothrow (call, true);
9499 new_stmt = call;
9500 data_ref = NULL_TREE;
9502 /* Need conversion if it's wrapped with VnQI. */
9503 machine_mode vmode = TYPE_MODE (vectype);
9504 opt_machine_mode new_ovmode
9505 = get_len_load_store_mode (vmode, true);
9506 machine_mode new_vmode = new_ovmode.require ();
9507 if (vmode != new_vmode)
9509 tree qi_type = unsigned_intQI_type_node;
9510 tree new_vtype
9511 = build_vector_type_for_mode (qi_type, new_vmode);
9512 tree var = vect_get_new_ssa_name (new_vtype,
9513 vect_simple_var);
9514 gimple_set_lhs (call, var);
9515 vect_finish_stmt_generation (vinfo, stmt_info, call,
9516 gsi);
9517 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9518 new_stmt
9519 = gimple_build_assign (vec_dest,
9520 VIEW_CONVERT_EXPR, op);
9523 else
9525 tree ltype = vectype;
9526 tree new_vtype = NULL_TREE;
9527 unsigned HOST_WIDE_INT gap
9528 = DR_GROUP_GAP (first_stmt_info);
9529 unsigned int vect_align
9530 = vect_known_alignment_in_bytes (first_dr_info);
9531 unsigned int scalar_dr_size
9532 = vect_get_scalar_dr_size (first_dr_info);
9533 /* If there's no peeling for gaps but we have a gap
9534 with slp loads then load the lower half of the
9535 vector only. See get_group_load_store_type for
9536 when we apply this optimization. */
9537 if (slp
9538 && loop_vinfo
9539 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9540 && gap != 0
9541 && known_eq (nunits, (group_size - gap) * 2)
9542 && known_eq (nunits, group_size)
9543 && gap >= (vect_align / scalar_dr_size))
9545 tree half_vtype;
9546 new_vtype
9547 = vector_vector_composition_type (vectype, 2,
9548 &half_vtype);
9549 if (new_vtype != NULL_TREE)
9550 ltype = half_vtype;
9552 tree offset
9553 = (dataref_offset ? dataref_offset
9554 : build_int_cst (ref_type, 0));
9555 if (ltype != vectype
9556 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9558 unsigned HOST_WIDE_INT gap_offset
9559 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9560 tree gapcst = build_int_cst (ref_type, gap_offset);
9561 offset = size_binop (PLUS_EXPR, offset, gapcst);
9563 data_ref
9564 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9565 if (alignment_support_scheme == dr_aligned)
9567 else
9568 TREE_TYPE (data_ref)
9569 = build_aligned_type (TREE_TYPE (data_ref),
9570 align * BITS_PER_UNIT);
9571 if (ltype != vectype)
9573 vect_copy_ref_info (data_ref,
9574 DR_REF (first_dr_info->dr));
9575 tree tem = make_ssa_name (ltype);
9576 new_stmt = gimple_build_assign (tem, data_ref);
9577 vect_finish_stmt_generation (vinfo, stmt_info,
9578 new_stmt, gsi);
9579 data_ref = NULL;
9580 vec<constructor_elt, va_gc> *v;
9581 vec_alloc (v, 2);
9582 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9584 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9585 build_zero_cst (ltype));
9586 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9588 else
9590 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9591 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9592 build_zero_cst (ltype));
9594 gcc_assert (new_vtype != NULL_TREE);
9595 if (new_vtype == vectype)
9596 new_stmt = gimple_build_assign (
9597 vec_dest, build_constructor (vectype, v));
9598 else
9600 tree new_vname = make_ssa_name (new_vtype);
9601 new_stmt = gimple_build_assign (
9602 new_vname, build_constructor (new_vtype, v));
9603 vect_finish_stmt_generation (vinfo, stmt_info,
9604 new_stmt, gsi);
9605 new_stmt = gimple_build_assign (
9606 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9607 new_vname));
9611 break;
9613 case dr_explicit_realign:
9615 tree ptr, bump;
9617 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9619 if (compute_in_loop)
9620 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9621 &realignment_token,
9622 dr_explicit_realign,
9623 dataref_ptr, NULL);
9625 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9626 ptr = copy_ssa_name (dataref_ptr);
9627 else
9628 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9629 // For explicit realign the target alignment should be
9630 // known at compile time.
9631 unsigned HOST_WIDE_INT align =
9632 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9633 new_stmt = gimple_build_assign
9634 (ptr, BIT_AND_EXPR, dataref_ptr,
9635 build_int_cst
9636 (TREE_TYPE (dataref_ptr),
9637 -(HOST_WIDE_INT) align));
9638 vect_finish_stmt_generation (vinfo, stmt_info,
9639 new_stmt, gsi);
9640 data_ref
9641 = build2 (MEM_REF, vectype, ptr,
9642 build_int_cst (ref_type, 0));
9643 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9644 vec_dest = vect_create_destination_var (scalar_dest,
9645 vectype);
9646 new_stmt = gimple_build_assign (vec_dest, data_ref);
9647 new_temp = make_ssa_name (vec_dest, new_stmt);
9648 gimple_assign_set_lhs (new_stmt, new_temp);
9649 gimple_move_vops (new_stmt, stmt_info->stmt);
9650 vect_finish_stmt_generation (vinfo, stmt_info,
9651 new_stmt, gsi);
9652 msq = new_temp;
9654 bump = size_binop (MULT_EXPR, vs,
9655 TYPE_SIZE_UNIT (elem_type));
9656 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9657 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9658 stmt_info, bump);
9659 new_stmt = gimple_build_assign
9660 (NULL_TREE, BIT_AND_EXPR, ptr,
9661 build_int_cst
9662 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9663 ptr = copy_ssa_name (ptr, new_stmt);
9664 gimple_assign_set_lhs (new_stmt, ptr);
9665 vect_finish_stmt_generation (vinfo, stmt_info,
9666 new_stmt, gsi);
9667 data_ref
9668 = build2 (MEM_REF, vectype, ptr,
9669 build_int_cst (ref_type, 0));
9670 break;
9672 case dr_explicit_realign_optimized:
9674 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9675 new_temp = copy_ssa_name (dataref_ptr);
9676 else
9677 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9678 // We should only be doing this if we know the target
9679 // alignment at compile time.
9680 unsigned HOST_WIDE_INT align =
9681 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9682 new_stmt = gimple_build_assign
9683 (new_temp, BIT_AND_EXPR, dataref_ptr,
9684 build_int_cst (TREE_TYPE (dataref_ptr),
9685 -(HOST_WIDE_INT) align));
9686 vect_finish_stmt_generation (vinfo, stmt_info,
9687 new_stmt, gsi);
9688 data_ref
9689 = build2 (MEM_REF, vectype, new_temp,
9690 build_int_cst (ref_type, 0));
9691 break;
9693 default:
9694 gcc_unreachable ();
9696 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9697 /* DATA_REF is null if we've already built the statement. */
9698 if (data_ref)
9700 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9701 new_stmt = gimple_build_assign (vec_dest, data_ref);
9703 new_temp = make_ssa_name (vec_dest, new_stmt);
9704 gimple_set_lhs (new_stmt, new_temp);
9705 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9707 /* 3. Handle explicit realignment if necessary/supported.
9708 Create in loop:
9709 vec_dest = realign_load (msq, lsq, realignment_token) */
9710 if (alignment_support_scheme == dr_explicit_realign_optimized
9711 || alignment_support_scheme == dr_explicit_realign)
9713 lsq = gimple_assign_lhs (new_stmt);
9714 if (!realignment_token)
9715 realignment_token = dataref_ptr;
9716 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9717 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9718 msq, lsq, realignment_token);
9719 new_temp = make_ssa_name (vec_dest, new_stmt);
9720 gimple_assign_set_lhs (new_stmt, new_temp);
9721 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9723 if (alignment_support_scheme == dr_explicit_realign_optimized)
9725 gcc_assert (phi);
9726 if (i == vec_num - 1 && j == ncopies - 1)
9727 add_phi_arg (phi, lsq,
9728 loop_latch_edge (containing_loop),
9729 UNKNOWN_LOCATION);
9730 msq = lsq;
9734 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9736 tree perm_mask = perm_mask_for_reverse (vectype);
9737 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9738 perm_mask, stmt_info, gsi);
9739 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9742 /* Collect vector loads and later create their permutation in
9743 vect_transform_grouped_load (). */
9744 if (grouped_load || slp_perm)
9745 dr_chain.quick_push (new_temp);
9747 /* Store vector loads in the corresponding SLP_NODE. */
9748 if (slp && !slp_perm)
9749 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9751 /* With SLP permutation we load the gaps as well, without
9752 we need to skip the gaps after we manage to fully load
9753 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9754 group_elt += nunits;
9755 if (maybe_ne (group_gap_adj, 0U)
9756 && !slp_perm
9757 && known_eq (group_elt, group_size - group_gap_adj))
9759 poly_wide_int bump_val
9760 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9761 * group_gap_adj);
9762 if (tree_int_cst_sgn
9763 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9764 bump_val = -bump_val;
9765 tree bump = wide_int_to_tree (sizetype, bump_val);
9766 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9767 gsi, stmt_info, bump);
9768 group_elt = 0;
9771 /* Bump the vector pointer to account for a gap or for excess
9772 elements loaded for a permuted SLP load. */
9773 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9775 poly_wide_int bump_val
9776 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9777 * group_gap_adj);
9778 if (tree_int_cst_sgn
9779 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9780 bump_val = -bump_val;
9781 tree bump = wide_int_to_tree (sizetype, bump_val);
9782 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9783 stmt_info, bump);
9787 if (slp && !slp_perm)
9788 continue;
9790 if (slp_perm)
9792 unsigned n_perms;
9793 /* For SLP we know we've seen all possible uses of dr_chain so
9794 direct vect_transform_slp_perm_load to DCE the unused parts.
9795 ??? This is a hack to prevent compile-time issues as seen
9796 in PR101120 and friends. */
9797 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9798 gsi, vf, false, &n_perms,
9799 nullptr, true);
9800 gcc_assert (ok);
9802 else
9804 if (grouped_load)
9806 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9807 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9808 group_size, gsi);
9809 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9811 else
9813 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9816 dr_chain.release ();
9818 if (!slp)
9819 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9821 return true;
9824 /* Function vect_is_simple_cond.
9826 Input:
9827 LOOP - the loop that is being vectorized.
9828 COND - Condition that is checked for simple use.
9830 Output:
9831 *COMP_VECTYPE - the vector type for the comparison.
9832 *DTS - The def types for the arguments of the comparison
9834 Returns whether a COND can be vectorized. Checks whether
9835 condition operands are supportable using vec_is_simple_use. */
9837 static bool
9838 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9839 slp_tree slp_node, tree *comp_vectype,
9840 enum vect_def_type *dts, tree vectype)
9842 tree lhs, rhs;
9843 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9844 slp_tree slp_op;
9846 /* Mask case. */
9847 if (TREE_CODE (cond) == SSA_NAME
9848 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9850 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9851 &slp_op, &dts[0], comp_vectype)
9852 || !*comp_vectype
9853 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9854 return false;
9855 return true;
9858 if (!COMPARISON_CLASS_P (cond))
9859 return false;
9861 lhs = TREE_OPERAND (cond, 0);
9862 rhs = TREE_OPERAND (cond, 1);
9864 if (TREE_CODE (lhs) == SSA_NAME)
9866 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9867 &lhs, &slp_op, &dts[0], &vectype1))
9868 return false;
9870 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9871 || TREE_CODE (lhs) == FIXED_CST)
9872 dts[0] = vect_constant_def;
9873 else
9874 return false;
9876 if (TREE_CODE (rhs) == SSA_NAME)
9878 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9879 &rhs, &slp_op, &dts[1], &vectype2))
9880 return false;
9882 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9883 || TREE_CODE (rhs) == FIXED_CST)
9884 dts[1] = vect_constant_def;
9885 else
9886 return false;
9888 if (vectype1 && vectype2
9889 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9890 TYPE_VECTOR_SUBPARTS (vectype2)))
9891 return false;
9893 *comp_vectype = vectype1 ? vectype1 : vectype2;
9894 /* Invariant comparison. */
9895 if (! *comp_vectype)
9897 tree scalar_type = TREE_TYPE (lhs);
9898 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9899 *comp_vectype = truth_type_for (vectype);
9900 else
9902 /* If we can widen the comparison to match vectype do so. */
9903 if (INTEGRAL_TYPE_P (scalar_type)
9904 && !slp_node
9905 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9906 TYPE_SIZE (TREE_TYPE (vectype))))
9907 scalar_type = build_nonstandard_integer_type
9908 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9909 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9910 slp_node);
9914 return true;
9917 /* vectorizable_condition.
9919 Check if STMT_INFO is conditional modify expression that can be vectorized.
9920 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9921 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9922 at GSI.
9924 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9926 Return true if STMT_INFO is vectorizable in this way. */
9928 static bool
9929 vectorizable_condition (vec_info *vinfo,
9930 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9931 gimple **vec_stmt,
9932 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9934 tree scalar_dest = NULL_TREE;
9935 tree vec_dest = NULL_TREE;
9936 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9937 tree then_clause, else_clause;
9938 tree comp_vectype = NULL_TREE;
9939 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9940 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9941 tree vec_compare;
9942 tree new_temp;
9943 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9944 enum vect_def_type dts[4]
9945 = {vect_unknown_def_type, vect_unknown_def_type,
9946 vect_unknown_def_type, vect_unknown_def_type};
9947 int ndts = 4;
9948 int ncopies;
9949 int vec_num;
9950 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9951 int i;
9952 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9953 vec<tree> vec_oprnds0 = vNULL;
9954 vec<tree> vec_oprnds1 = vNULL;
9955 vec<tree> vec_oprnds2 = vNULL;
9956 vec<tree> vec_oprnds3 = vNULL;
9957 tree vec_cmp_type;
9958 bool masked = false;
9960 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9961 return false;
9963 /* Is vectorizable conditional operation? */
9964 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9965 if (!stmt)
9966 return false;
9968 code = gimple_assign_rhs_code (stmt);
9969 if (code != COND_EXPR)
9970 return false;
9972 stmt_vec_info reduc_info = NULL;
9973 int reduc_index = -1;
9974 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9975 bool for_reduction
9976 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9977 if (for_reduction)
9979 if (STMT_SLP_TYPE (stmt_info))
9980 return false;
9981 reduc_info = info_for_reduction (vinfo, stmt_info);
9982 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9983 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9984 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9985 || reduc_index != -1);
9987 else
9989 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9990 return false;
9993 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9994 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9996 if (slp_node)
9998 ncopies = 1;
9999 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10001 else
10003 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10004 vec_num = 1;
10007 gcc_assert (ncopies >= 1);
10008 if (for_reduction && ncopies > 1)
10009 return false; /* FORNOW */
10011 cond_expr = gimple_assign_rhs1 (stmt);
10013 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10014 &comp_vectype, &dts[0], vectype)
10015 || !comp_vectype)
10016 return false;
10018 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10019 slp_tree then_slp_node, else_slp_node;
10020 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10021 &then_clause, &then_slp_node, &dts[2], &vectype1))
10022 return false;
10023 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10024 &else_clause, &else_slp_node, &dts[3], &vectype2))
10025 return false;
10027 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10028 return false;
10030 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10031 return false;
10033 masked = !COMPARISON_CLASS_P (cond_expr);
10034 vec_cmp_type = truth_type_for (comp_vectype);
10036 if (vec_cmp_type == NULL_TREE)
10037 return false;
10039 cond_code = TREE_CODE (cond_expr);
10040 if (!masked)
10042 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10043 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10046 /* For conditional reductions, the "then" value needs to be the candidate
10047 value calculated by this iteration while the "else" value needs to be
10048 the result carried over from previous iterations. If the COND_EXPR
10049 is the other way around, we need to swap it. */
10050 bool must_invert_cmp_result = false;
10051 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10053 if (masked)
10054 must_invert_cmp_result = true;
10055 else
10057 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10058 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10059 if (new_code == ERROR_MARK)
10060 must_invert_cmp_result = true;
10061 else
10063 cond_code = new_code;
10064 /* Make sure we don't accidentally use the old condition. */
10065 cond_expr = NULL_TREE;
10068 std::swap (then_clause, else_clause);
10071 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10073 /* Boolean values may have another representation in vectors
10074 and therefore we prefer bit operations over comparison for
10075 them (which also works for scalar masks). We store opcodes
10076 to use in bitop1 and bitop2. Statement is vectorized as
10077 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10078 depending on bitop1 and bitop2 arity. */
10079 switch (cond_code)
10081 case GT_EXPR:
10082 bitop1 = BIT_NOT_EXPR;
10083 bitop2 = BIT_AND_EXPR;
10084 break;
10085 case GE_EXPR:
10086 bitop1 = BIT_NOT_EXPR;
10087 bitop2 = BIT_IOR_EXPR;
10088 break;
10089 case LT_EXPR:
10090 bitop1 = BIT_NOT_EXPR;
10091 bitop2 = BIT_AND_EXPR;
10092 std::swap (cond_expr0, cond_expr1);
10093 break;
10094 case LE_EXPR:
10095 bitop1 = BIT_NOT_EXPR;
10096 bitop2 = BIT_IOR_EXPR;
10097 std::swap (cond_expr0, cond_expr1);
10098 break;
10099 case NE_EXPR:
10100 bitop1 = BIT_XOR_EXPR;
10101 break;
10102 case EQ_EXPR:
10103 bitop1 = BIT_XOR_EXPR;
10104 bitop2 = BIT_NOT_EXPR;
10105 break;
10106 default:
10107 return false;
10109 cond_code = SSA_NAME;
10112 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10113 && reduction_type == EXTRACT_LAST_REDUCTION
10114 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10116 if (dump_enabled_p ())
10117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10118 "reduction comparison operation not supported.\n");
10119 return false;
10122 if (!vec_stmt)
10124 if (bitop1 != NOP_EXPR)
10126 machine_mode mode = TYPE_MODE (comp_vectype);
10127 optab optab;
10129 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10130 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10131 return false;
10133 if (bitop2 != NOP_EXPR)
10135 optab = optab_for_tree_code (bitop2, comp_vectype,
10136 optab_default);
10137 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10138 return false;
10142 vect_cost_for_stmt kind = vector_stmt;
10143 if (reduction_type == EXTRACT_LAST_REDUCTION)
10144 /* Count one reduction-like operation per vector. */
10145 kind = vec_to_scalar;
10146 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10147 return false;
10149 if (slp_node
10150 && (!vect_maybe_update_slp_op_vectype
10151 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10152 || (op_adjust == 1
10153 && !vect_maybe_update_slp_op_vectype
10154 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10155 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10156 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10158 if (dump_enabled_p ())
10159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10160 "incompatible vector types for invariants\n");
10161 return false;
10164 if (loop_vinfo && for_reduction
10165 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10167 if (reduction_type == EXTRACT_LAST_REDUCTION)
10168 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10169 ncopies * vec_num, vectype, NULL);
10170 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10171 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10173 if (dump_enabled_p ())
10174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10175 "conditional reduction prevents the use"
10176 " of partial vectors.\n");
10177 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10181 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10182 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10183 cost_vec, kind);
10184 return true;
10187 /* Transform. */
10189 /* Handle def. */
10190 scalar_dest = gimple_assign_lhs (stmt);
10191 if (reduction_type != EXTRACT_LAST_REDUCTION)
10192 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10194 bool swap_cond_operands = false;
10196 /* See whether another part of the vectorized code applies a loop
10197 mask to the condition, or to its inverse. */
10199 vec_loop_masks *masks = NULL;
10200 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10202 if (reduction_type == EXTRACT_LAST_REDUCTION)
10203 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10204 else
10206 scalar_cond_masked_key cond (cond_expr, ncopies);
10207 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10208 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10209 else
10211 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10212 cond.code = invert_tree_comparison (cond.code, honor_nans);
10213 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10215 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10216 cond_code = cond.code;
10217 swap_cond_operands = true;
10223 /* Handle cond expr. */
10224 if (masked)
10225 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10226 cond_expr, &vec_oprnds0, comp_vectype,
10227 then_clause, &vec_oprnds2, vectype,
10228 reduction_type != EXTRACT_LAST_REDUCTION
10229 ? else_clause : NULL, &vec_oprnds3, vectype);
10230 else
10231 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10232 cond_expr0, &vec_oprnds0, comp_vectype,
10233 cond_expr1, &vec_oprnds1, comp_vectype,
10234 then_clause, &vec_oprnds2, vectype,
10235 reduction_type != EXTRACT_LAST_REDUCTION
10236 ? else_clause : NULL, &vec_oprnds3, vectype);
10238 /* Arguments are ready. Create the new vector stmt. */
10239 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10241 vec_then_clause = vec_oprnds2[i];
10242 if (reduction_type != EXTRACT_LAST_REDUCTION)
10243 vec_else_clause = vec_oprnds3[i];
10245 if (swap_cond_operands)
10246 std::swap (vec_then_clause, vec_else_clause);
10248 if (masked)
10249 vec_compare = vec_cond_lhs;
10250 else
10252 vec_cond_rhs = vec_oprnds1[i];
10253 if (bitop1 == NOP_EXPR)
10255 gimple_seq stmts = NULL;
10256 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10257 vec_cond_lhs, vec_cond_rhs);
10258 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10260 else
10262 new_temp = make_ssa_name (vec_cmp_type);
10263 gassign *new_stmt;
10264 if (bitop1 == BIT_NOT_EXPR)
10265 new_stmt = gimple_build_assign (new_temp, bitop1,
10266 vec_cond_rhs);
10267 else
10268 new_stmt
10269 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10270 vec_cond_rhs);
10271 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10272 if (bitop2 == NOP_EXPR)
10273 vec_compare = new_temp;
10274 else if (bitop2 == BIT_NOT_EXPR)
10276 /* Instead of doing ~x ? y : z do x ? z : y. */
10277 vec_compare = new_temp;
10278 std::swap (vec_then_clause, vec_else_clause);
10280 else
10282 vec_compare = make_ssa_name (vec_cmp_type);
10283 new_stmt
10284 = gimple_build_assign (vec_compare, bitop2,
10285 vec_cond_lhs, new_temp);
10286 vect_finish_stmt_generation (vinfo, stmt_info,
10287 new_stmt, gsi);
10292 /* If we decided to apply a loop mask to the result of the vector
10293 comparison, AND the comparison with the mask now. Later passes
10294 should then be able to reuse the AND results between mulitple
10295 vector statements.
10297 For example:
10298 for (int i = 0; i < 100; ++i)
10299 x[i] = y[i] ? z[i] : 10;
10301 results in following optimized GIMPLE:
10303 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10304 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10305 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10306 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10307 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10308 vect_iftmp.11_47, { 10, ... }>;
10310 instead of using a masked and unmasked forms of
10311 vec != { 0, ... } (masked in the MASK_LOAD,
10312 unmasked in the VEC_COND_EXPR). */
10314 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10315 in cases where that's necessary. */
10317 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10319 if (!is_gimple_val (vec_compare))
10321 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10322 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10323 vec_compare);
10324 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10325 vec_compare = vec_compare_name;
10328 if (must_invert_cmp_result)
10330 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10331 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10332 BIT_NOT_EXPR,
10333 vec_compare);
10334 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10335 vec_compare = vec_compare_name;
10338 if (masks)
10340 tree loop_mask
10341 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10342 vectype, i);
10343 tree tmp2 = make_ssa_name (vec_cmp_type);
10344 gassign *g
10345 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10346 loop_mask);
10347 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10348 vec_compare = tmp2;
10352 gimple *new_stmt;
10353 if (reduction_type == EXTRACT_LAST_REDUCTION)
10355 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10356 tree lhs = gimple_get_lhs (old_stmt);
10357 new_stmt = gimple_build_call_internal
10358 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10359 vec_then_clause);
10360 gimple_call_set_lhs (new_stmt, lhs);
10361 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10362 if (old_stmt == gsi_stmt (*gsi))
10363 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10364 else
10366 /* In this case we're moving the definition to later in the
10367 block. That doesn't matter because the only uses of the
10368 lhs are in phi statements. */
10369 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10370 gsi_remove (&old_gsi, true);
10371 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10374 else
10376 new_temp = make_ssa_name (vec_dest);
10377 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10378 vec_then_clause, vec_else_clause);
10379 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10381 if (slp_node)
10382 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10383 else
10384 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10387 if (!slp_node)
10388 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10390 vec_oprnds0.release ();
10391 vec_oprnds1.release ();
10392 vec_oprnds2.release ();
10393 vec_oprnds3.release ();
10395 return true;
10398 /* vectorizable_comparison.
10400 Check if STMT_INFO is comparison expression that can be vectorized.
10401 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10402 comparison, put it in VEC_STMT, and insert it at GSI.
10404 Return true if STMT_INFO is vectorizable in this way. */
10406 static bool
10407 vectorizable_comparison (vec_info *vinfo,
10408 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10409 gimple **vec_stmt,
10410 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10412 tree lhs, rhs1, rhs2;
10413 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10414 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10415 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10416 tree new_temp;
10417 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10418 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10419 int ndts = 2;
10420 poly_uint64 nunits;
10421 int ncopies;
10422 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10423 int i;
10424 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10425 vec<tree> vec_oprnds0 = vNULL;
10426 vec<tree> vec_oprnds1 = vNULL;
10427 tree mask_type;
10428 tree mask;
10430 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10431 return false;
10433 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10434 return false;
10436 mask_type = vectype;
10437 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10439 if (slp_node)
10440 ncopies = 1;
10441 else
10442 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10444 gcc_assert (ncopies >= 1);
10445 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10446 return false;
10448 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10449 if (!stmt)
10450 return false;
10452 code = gimple_assign_rhs_code (stmt);
10454 if (TREE_CODE_CLASS (code) != tcc_comparison)
10455 return false;
10457 slp_tree slp_rhs1, slp_rhs2;
10458 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10459 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10460 return false;
10462 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10463 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10464 return false;
10466 if (vectype1 && vectype2
10467 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10468 TYPE_VECTOR_SUBPARTS (vectype2)))
10469 return false;
10471 vectype = vectype1 ? vectype1 : vectype2;
10473 /* Invariant comparison. */
10474 if (!vectype)
10476 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10477 vectype = mask_type;
10478 else
10479 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10480 slp_node);
10481 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10482 return false;
10484 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10485 return false;
10487 /* Can't compare mask and non-mask types. */
10488 if (vectype1 && vectype2
10489 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10490 return false;
10492 /* Boolean values may have another representation in vectors
10493 and therefore we prefer bit operations over comparison for
10494 them (which also works for scalar masks). We store opcodes
10495 to use in bitop1 and bitop2. Statement is vectorized as
10496 BITOP2 (rhs1 BITOP1 rhs2) or
10497 rhs1 BITOP2 (BITOP1 rhs2)
10498 depending on bitop1 and bitop2 arity. */
10499 bool swap_p = false;
10500 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10502 if (code == GT_EXPR)
10504 bitop1 = BIT_NOT_EXPR;
10505 bitop2 = BIT_AND_EXPR;
10507 else if (code == GE_EXPR)
10509 bitop1 = BIT_NOT_EXPR;
10510 bitop2 = BIT_IOR_EXPR;
10512 else if (code == LT_EXPR)
10514 bitop1 = BIT_NOT_EXPR;
10515 bitop2 = BIT_AND_EXPR;
10516 swap_p = true;
10518 else if (code == LE_EXPR)
10520 bitop1 = BIT_NOT_EXPR;
10521 bitop2 = BIT_IOR_EXPR;
10522 swap_p = true;
10524 else
10526 bitop1 = BIT_XOR_EXPR;
10527 if (code == EQ_EXPR)
10528 bitop2 = BIT_NOT_EXPR;
10532 if (!vec_stmt)
10534 if (bitop1 == NOP_EXPR)
10536 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10537 return false;
10539 else
10541 machine_mode mode = TYPE_MODE (vectype);
10542 optab optab;
10544 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10545 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10546 return false;
10548 if (bitop2 != NOP_EXPR)
10550 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10551 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10552 return false;
10556 /* Put types on constant and invariant SLP children. */
10557 if (slp_node
10558 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10559 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10561 if (dump_enabled_p ())
10562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10563 "incompatible vector types for invariants\n");
10564 return false;
10567 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10568 vect_model_simple_cost (vinfo, stmt_info,
10569 ncopies * (1 + (bitop2 != NOP_EXPR)),
10570 dts, ndts, slp_node, cost_vec);
10571 return true;
10574 /* Transform. */
10576 /* Handle def. */
10577 lhs = gimple_assign_lhs (stmt);
10578 mask = vect_create_destination_var (lhs, mask_type);
10580 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10581 rhs1, &vec_oprnds0, vectype,
10582 rhs2, &vec_oprnds1, vectype);
10583 if (swap_p)
10584 std::swap (vec_oprnds0, vec_oprnds1);
10586 /* Arguments are ready. Create the new vector stmt. */
10587 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10589 gimple *new_stmt;
10590 vec_rhs2 = vec_oprnds1[i];
10592 new_temp = make_ssa_name (mask);
10593 if (bitop1 == NOP_EXPR)
10595 new_stmt = gimple_build_assign (new_temp, code,
10596 vec_rhs1, vec_rhs2);
10597 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10599 else
10601 if (bitop1 == BIT_NOT_EXPR)
10602 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10603 else
10604 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10605 vec_rhs2);
10606 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10607 if (bitop2 != NOP_EXPR)
10609 tree res = make_ssa_name (mask);
10610 if (bitop2 == BIT_NOT_EXPR)
10611 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10612 else
10613 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10614 new_temp);
10615 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10618 if (slp_node)
10619 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10620 else
10621 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10624 if (!slp_node)
10625 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10627 vec_oprnds0.release ();
10628 vec_oprnds1.release ();
10630 return true;
10633 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10634 can handle all live statements in the node. Otherwise return true
10635 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10636 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10638 static bool
10639 can_vectorize_live_stmts (vec_info *vinfo,
10640 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10641 slp_tree slp_node, slp_instance slp_node_instance,
10642 bool vec_stmt_p,
10643 stmt_vector_for_cost *cost_vec)
10645 if (slp_node)
10647 stmt_vec_info slp_stmt_info;
10648 unsigned int i;
10649 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10651 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10652 && !vectorizable_live_operation (vinfo,
10653 slp_stmt_info, gsi, slp_node,
10654 slp_node_instance, i,
10655 vec_stmt_p, cost_vec))
10656 return false;
10659 else if (STMT_VINFO_LIVE_P (stmt_info)
10660 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10661 slp_node, slp_node_instance, -1,
10662 vec_stmt_p, cost_vec))
10663 return false;
10665 return true;
10668 /* Make sure the statement is vectorizable. */
10670 opt_result
10671 vect_analyze_stmt (vec_info *vinfo,
10672 stmt_vec_info stmt_info, bool *need_to_vectorize,
10673 slp_tree node, slp_instance node_instance,
10674 stmt_vector_for_cost *cost_vec)
10676 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10677 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10678 bool ok;
10679 gimple_seq pattern_def_seq;
10681 if (dump_enabled_p ())
10682 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10683 stmt_info->stmt);
10685 if (gimple_has_volatile_ops (stmt_info->stmt))
10686 return opt_result::failure_at (stmt_info->stmt,
10687 "not vectorized:"
10688 " stmt has volatile operands: %G\n",
10689 stmt_info->stmt);
10691 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10692 && node == NULL
10693 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10695 gimple_stmt_iterator si;
10697 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10699 stmt_vec_info pattern_def_stmt_info
10700 = vinfo->lookup_stmt (gsi_stmt (si));
10701 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10702 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10704 /* Analyze def stmt of STMT if it's a pattern stmt. */
10705 if (dump_enabled_p ())
10706 dump_printf_loc (MSG_NOTE, vect_location,
10707 "==> examining pattern def statement: %G",
10708 pattern_def_stmt_info->stmt);
10710 opt_result res
10711 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10712 need_to_vectorize, node, node_instance,
10713 cost_vec);
10714 if (!res)
10715 return res;
10720 /* Skip stmts that do not need to be vectorized. In loops this is expected
10721 to include:
10722 - the COND_EXPR which is the loop exit condition
10723 - any LABEL_EXPRs in the loop
10724 - computations that are used only for array indexing or loop control.
10725 In basic blocks we only analyze statements that are a part of some SLP
10726 instance, therefore, all the statements are relevant.
10728 Pattern statement needs to be analyzed instead of the original statement
10729 if the original statement is not relevant. Otherwise, we analyze both
10730 statements. In basic blocks we are called from some SLP instance
10731 traversal, don't analyze pattern stmts instead, the pattern stmts
10732 already will be part of SLP instance. */
10734 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10735 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10736 && !STMT_VINFO_LIVE_P (stmt_info))
10738 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10739 && pattern_stmt_info
10740 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10741 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10743 /* Analyze PATTERN_STMT instead of the original stmt. */
10744 stmt_info = pattern_stmt_info;
10745 if (dump_enabled_p ())
10746 dump_printf_loc (MSG_NOTE, vect_location,
10747 "==> examining pattern statement: %G",
10748 stmt_info->stmt);
10750 else
10752 if (dump_enabled_p ())
10753 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10755 return opt_result::success ();
10758 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10759 && node == NULL
10760 && pattern_stmt_info
10761 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10762 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10764 /* Analyze PATTERN_STMT too. */
10765 if (dump_enabled_p ())
10766 dump_printf_loc (MSG_NOTE, vect_location,
10767 "==> examining pattern statement: %G",
10768 pattern_stmt_info->stmt);
10770 opt_result res
10771 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10772 node_instance, cost_vec);
10773 if (!res)
10774 return res;
10777 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10779 case vect_internal_def:
10780 break;
10782 case vect_reduction_def:
10783 case vect_nested_cycle:
10784 gcc_assert (!bb_vinfo
10785 && (relevance == vect_used_in_outer
10786 || relevance == vect_used_in_outer_by_reduction
10787 || relevance == vect_used_by_reduction
10788 || relevance == vect_unused_in_scope
10789 || relevance == vect_used_only_live));
10790 break;
10792 case vect_induction_def:
10793 gcc_assert (!bb_vinfo);
10794 break;
10796 case vect_constant_def:
10797 case vect_external_def:
10798 case vect_unknown_def_type:
10799 default:
10800 gcc_unreachable ();
10803 if (STMT_VINFO_RELEVANT_P (stmt_info))
10805 tree type = gimple_expr_type (stmt_info->stmt);
10806 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10807 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10808 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10809 || (call && gimple_call_lhs (call) == NULL_TREE));
10810 *need_to_vectorize = true;
10813 if (PURE_SLP_STMT (stmt_info) && !node)
10815 if (dump_enabled_p ())
10816 dump_printf_loc (MSG_NOTE, vect_location,
10817 "handled only by SLP analysis\n");
10818 return opt_result::success ();
10821 ok = true;
10822 if (!bb_vinfo
10823 && (STMT_VINFO_RELEVANT_P (stmt_info)
10824 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10825 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10826 -mveclibabi= takes preference over library functions with
10827 the simd attribute. */
10828 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10829 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10830 cost_vec)
10831 || vectorizable_conversion (vinfo, stmt_info,
10832 NULL, NULL, node, cost_vec)
10833 || vectorizable_operation (vinfo, stmt_info,
10834 NULL, NULL, node, cost_vec)
10835 || vectorizable_assignment (vinfo, stmt_info,
10836 NULL, NULL, node, cost_vec)
10837 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10838 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10839 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10840 node, node_instance, cost_vec)
10841 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10842 NULL, node, cost_vec)
10843 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10844 || vectorizable_condition (vinfo, stmt_info,
10845 NULL, NULL, node, cost_vec)
10846 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10847 cost_vec)
10848 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10849 stmt_info, NULL, node));
10850 else
10852 if (bb_vinfo)
10853 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10854 || vectorizable_simd_clone_call (vinfo, stmt_info,
10855 NULL, NULL, node, cost_vec)
10856 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10857 cost_vec)
10858 || vectorizable_shift (vinfo, stmt_info,
10859 NULL, NULL, node, cost_vec)
10860 || vectorizable_operation (vinfo, stmt_info,
10861 NULL, NULL, node, cost_vec)
10862 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10863 cost_vec)
10864 || vectorizable_load (vinfo, stmt_info,
10865 NULL, NULL, node, cost_vec)
10866 || vectorizable_store (vinfo, stmt_info,
10867 NULL, NULL, node, cost_vec)
10868 || vectorizable_condition (vinfo, stmt_info,
10869 NULL, NULL, node, cost_vec)
10870 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10871 cost_vec)
10872 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
10875 if (!ok)
10876 return opt_result::failure_at (stmt_info->stmt,
10877 "not vectorized:"
10878 " relevant stmt not supported: %G",
10879 stmt_info->stmt);
10881 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10882 need extra handling, except for vectorizable reductions. */
10883 if (!bb_vinfo
10884 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10885 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10886 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10887 stmt_info, NULL, node, node_instance,
10888 false, cost_vec))
10889 return opt_result::failure_at (stmt_info->stmt,
10890 "not vectorized:"
10891 " live stmt not supported: %G",
10892 stmt_info->stmt);
10894 return opt_result::success ();
10898 /* Function vect_transform_stmt.
10900 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10902 bool
10903 vect_transform_stmt (vec_info *vinfo,
10904 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10905 slp_tree slp_node, slp_instance slp_node_instance)
10907 bool is_store = false;
10908 gimple *vec_stmt = NULL;
10909 bool done;
10911 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10913 switch (STMT_VINFO_TYPE (stmt_info))
10915 case type_demotion_vec_info_type:
10916 case type_promotion_vec_info_type:
10917 case type_conversion_vec_info_type:
10918 done = vectorizable_conversion (vinfo, stmt_info,
10919 gsi, &vec_stmt, slp_node, NULL);
10920 gcc_assert (done);
10921 break;
10923 case induc_vec_info_type:
10924 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10925 stmt_info, &vec_stmt, slp_node,
10926 NULL);
10927 gcc_assert (done);
10928 break;
10930 case shift_vec_info_type:
10931 done = vectorizable_shift (vinfo, stmt_info,
10932 gsi, &vec_stmt, slp_node, NULL);
10933 gcc_assert (done);
10934 break;
10936 case op_vec_info_type:
10937 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10938 NULL);
10939 gcc_assert (done);
10940 break;
10942 case assignment_vec_info_type:
10943 done = vectorizable_assignment (vinfo, stmt_info,
10944 gsi, &vec_stmt, slp_node, NULL);
10945 gcc_assert (done);
10946 break;
10948 case load_vec_info_type:
10949 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10950 NULL);
10951 gcc_assert (done);
10952 break;
10954 case store_vec_info_type:
10955 done = vectorizable_store (vinfo, stmt_info,
10956 gsi, &vec_stmt, slp_node, NULL);
10957 gcc_assert (done);
10958 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10960 /* In case of interleaving, the whole chain is vectorized when the
10961 last store in the chain is reached. Store stmts before the last
10962 one are skipped, and there vec_stmt_info shouldn't be freed
10963 meanwhile. */
10964 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10965 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10966 is_store = true;
10968 else
10969 is_store = true;
10970 break;
10972 case condition_vec_info_type:
10973 done = vectorizable_condition (vinfo, stmt_info,
10974 gsi, &vec_stmt, slp_node, NULL);
10975 gcc_assert (done);
10976 break;
10978 case comparison_vec_info_type:
10979 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10980 slp_node, NULL);
10981 gcc_assert (done);
10982 break;
10984 case call_vec_info_type:
10985 done = vectorizable_call (vinfo, stmt_info,
10986 gsi, &vec_stmt, slp_node, NULL);
10987 break;
10989 case call_simd_clone_vec_info_type:
10990 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10991 slp_node, NULL);
10992 break;
10994 case reduc_vec_info_type:
10995 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10996 gsi, &vec_stmt, slp_node);
10997 gcc_assert (done);
10998 break;
11000 case cycle_phi_info_type:
11001 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11002 &vec_stmt, slp_node, slp_node_instance);
11003 gcc_assert (done);
11004 break;
11006 case lc_phi_info_type:
11007 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11008 stmt_info, &vec_stmt, slp_node);
11009 gcc_assert (done);
11010 break;
11012 case phi_info_type:
11013 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11014 gcc_assert (done);
11015 break;
11017 default:
11018 if (!STMT_VINFO_LIVE_P (stmt_info))
11020 if (dump_enabled_p ())
11021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11022 "stmt not supported.\n");
11023 gcc_unreachable ();
11025 done = true;
11028 if (!slp_node && vec_stmt)
11029 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11031 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11032 return is_store;
11034 /* Handle stmts whose DEF is used outside the loop-nest that is
11035 being vectorized. */
11036 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11037 slp_node_instance, true, NULL);
11038 gcc_assert (done);
11040 return false;
11044 /* Remove a group of stores (for SLP or interleaving), free their
11045 stmt_vec_info. */
11047 void
11048 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11050 stmt_vec_info next_stmt_info = first_stmt_info;
11052 while (next_stmt_info)
11054 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11055 next_stmt_info = vect_orig_stmt (next_stmt_info);
11056 /* Free the attached stmt_vec_info and remove the stmt. */
11057 vinfo->remove_stmt (next_stmt_info);
11058 next_stmt_info = tmp;
11062 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11063 elements of type SCALAR_TYPE, or null if the target doesn't support
11064 such a type.
11066 If NUNITS is zero, return a vector type that contains elements of
11067 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11069 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11070 for this vectorization region and want to "autodetect" the best choice.
11071 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11072 and we want the new type to be interoperable with it. PREVAILING_MODE
11073 in this case can be a scalar integer mode or a vector mode; when it
11074 is a vector mode, the function acts like a tree-level version of
11075 related_vector_mode. */
11077 tree
11078 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11079 tree scalar_type, poly_uint64 nunits)
11081 tree orig_scalar_type = scalar_type;
11082 scalar_mode inner_mode;
11083 machine_mode simd_mode;
11084 tree vectype;
11086 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11087 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11088 return NULL_TREE;
11090 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11092 /* For vector types of elements whose mode precision doesn't
11093 match their types precision we use a element type of mode
11094 precision. The vectorization routines will have to make sure
11095 they support the proper result truncation/extension.
11096 We also make sure to build vector types with INTEGER_TYPE
11097 component type only. */
11098 if (INTEGRAL_TYPE_P (scalar_type)
11099 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11100 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11101 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11102 TYPE_UNSIGNED (scalar_type));
11104 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11105 When the component mode passes the above test simply use a type
11106 corresponding to that mode. The theory is that any use that
11107 would cause problems with this will disable vectorization anyway. */
11108 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11109 && !INTEGRAL_TYPE_P (scalar_type))
11110 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11112 /* We can't build a vector type of elements with alignment bigger than
11113 their size. */
11114 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11115 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11116 TYPE_UNSIGNED (scalar_type));
11118 /* If we felt back to using the mode fail if there was
11119 no scalar type for it. */
11120 if (scalar_type == NULL_TREE)
11121 return NULL_TREE;
11123 /* If no prevailing mode was supplied, use the mode the target prefers.
11124 Otherwise lookup a vector mode based on the prevailing mode. */
11125 if (prevailing_mode == VOIDmode)
11127 gcc_assert (known_eq (nunits, 0U));
11128 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11129 if (SCALAR_INT_MODE_P (simd_mode))
11131 /* Traditional behavior is not to take the integer mode
11132 literally, but simply to use it as a way of determining
11133 the vector size. It is up to mode_for_vector to decide
11134 what the TYPE_MODE should be.
11136 Note that nunits == 1 is allowed in order to support single
11137 element vector types. */
11138 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11139 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11140 return NULL_TREE;
11143 else if (SCALAR_INT_MODE_P (prevailing_mode)
11144 || !related_vector_mode (prevailing_mode,
11145 inner_mode, nunits).exists (&simd_mode))
11147 /* Fall back to using mode_for_vector, mostly in the hope of being
11148 able to use an integer mode. */
11149 if (known_eq (nunits, 0U)
11150 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11151 return NULL_TREE;
11153 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11154 return NULL_TREE;
11157 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11159 /* In cases where the mode was chosen by mode_for_vector, check that
11160 the target actually supports the chosen mode, or that it at least
11161 allows the vector mode to be replaced by a like-sized integer. */
11162 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11163 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11164 return NULL_TREE;
11166 /* Re-attach the address-space qualifier if we canonicalized the scalar
11167 type. */
11168 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11169 return build_qualified_type
11170 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11172 return vectype;
11175 /* Function get_vectype_for_scalar_type.
11177 Returns the vector type corresponding to SCALAR_TYPE as supported
11178 by the target. If GROUP_SIZE is nonzero and we're performing BB
11179 vectorization, make sure that the number of elements in the vector
11180 is no bigger than GROUP_SIZE. */
11182 tree
11183 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11184 unsigned int group_size)
11186 /* For BB vectorization, we should always have a group size once we've
11187 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11188 are tentative requests during things like early data reference
11189 analysis and pattern recognition. */
11190 if (is_a <bb_vec_info> (vinfo))
11191 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11192 else
11193 group_size = 0;
11195 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11196 scalar_type);
11197 if (vectype && vinfo->vector_mode == VOIDmode)
11198 vinfo->vector_mode = TYPE_MODE (vectype);
11200 /* Register the natural choice of vector type, before the group size
11201 has been applied. */
11202 if (vectype)
11203 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11205 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11206 try again with an explicit number of elements. */
11207 if (vectype
11208 && group_size
11209 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11211 /* Start with the biggest number of units that fits within
11212 GROUP_SIZE and halve it until we find a valid vector type.
11213 Usually either the first attempt will succeed or all will
11214 fail (in the latter case because GROUP_SIZE is too small
11215 for the target), but it's possible that a target could have
11216 a hole between supported vector types.
11218 If GROUP_SIZE is not a power of 2, this has the effect of
11219 trying the largest power of 2 that fits within the group,
11220 even though the group is not a multiple of that vector size.
11221 The BB vectorizer will then try to carve up the group into
11222 smaller pieces. */
11223 unsigned int nunits = 1 << floor_log2 (group_size);
11226 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11227 scalar_type, nunits);
11228 nunits /= 2;
11230 while (nunits > 1 && !vectype);
11233 return vectype;
11236 /* Return the vector type corresponding to SCALAR_TYPE as supported
11237 by the target. NODE, if nonnull, is the SLP tree node that will
11238 use the returned vector type. */
11240 tree
11241 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11243 unsigned int group_size = 0;
11244 if (node)
11245 group_size = SLP_TREE_LANES (node);
11246 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11249 /* Function get_mask_type_for_scalar_type.
11251 Returns the mask type corresponding to a result of comparison
11252 of vectors of specified SCALAR_TYPE as supported by target.
11253 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11254 make sure that the number of elements in the vector is no bigger
11255 than GROUP_SIZE. */
11257 tree
11258 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11259 unsigned int group_size)
11261 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11263 if (!vectype)
11264 return NULL;
11266 return truth_type_for (vectype);
11269 /* Function get_same_sized_vectype
11271 Returns a vector type corresponding to SCALAR_TYPE of size
11272 VECTOR_TYPE if supported by the target. */
11274 tree
11275 get_same_sized_vectype (tree scalar_type, tree vector_type)
11277 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11278 return truth_type_for (vector_type);
11280 poly_uint64 nunits;
11281 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11282 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11283 return NULL_TREE;
11285 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11286 scalar_type, nunits);
11289 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11290 would not change the chosen vector modes. */
11292 bool
11293 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11295 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11296 i != vinfo->used_vector_modes.end (); ++i)
11297 if (!VECTOR_MODE_P (*i)
11298 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11299 return false;
11300 return true;
11303 /* Function vect_is_simple_use.
11305 Input:
11306 VINFO - the vect info of the loop or basic block that is being vectorized.
11307 OPERAND - operand in the loop or bb.
11308 Output:
11309 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11310 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11311 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11312 the definition could be anywhere in the function
11313 DT - the type of definition
11315 Returns whether a stmt with OPERAND can be vectorized.
11316 For loops, supportable operands are constants, loop invariants, and operands
11317 that are defined by the current iteration of the loop. Unsupportable
11318 operands are those that are defined by a previous iteration of the loop (as
11319 is the case in reduction/induction computations).
11320 For basic blocks, supportable operands are constants and bb invariants.
11321 For now, operands defined outside the basic block are not supported. */
11323 bool
11324 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11325 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11327 if (def_stmt_info_out)
11328 *def_stmt_info_out = NULL;
11329 if (def_stmt_out)
11330 *def_stmt_out = NULL;
11331 *dt = vect_unknown_def_type;
11333 if (dump_enabled_p ())
11335 dump_printf_loc (MSG_NOTE, vect_location,
11336 "vect_is_simple_use: operand ");
11337 if (TREE_CODE (operand) == SSA_NAME
11338 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11339 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11340 else
11341 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11344 if (CONSTANT_CLASS_P (operand))
11345 *dt = vect_constant_def;
11346 else if (is_gimple_min_invariant (operand))
11347 *dt = vect_external_def;
11348 else if (TREE_CODE (operand) != SSA_NAME)
11349 *dt = vect_unknown_def_type;
11350 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11351 *dt = vect_external_def;
11352 else
11354 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11355 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11356 if (!stmt_vinfo)
11357 *dt = vect_external_def;
11358 else
11360 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11361 def_stmt = stmt_vinfo->stmt;
11362 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11363 if (def_stmt_info_out)
11364 *def_stmt_info_out = stmt_vinfo;
11366 if (def_stmt_out)
11367 *def_stmt_out = def_stmt;
11370 if (dump_enabled_p ())
11372 dump_printf (MSG_NOTE, ", type of def: ");
11373 switch (*dt)
11375 case vect_uninitialized_def:
11376 dump_printf (MSG_NOTE, "uninitialized\n");
11377 break;
11378 case vect_constant_def:
11379 dump_printf (MSG_NOTE, "constant\n");
11380 break;
11381 case vect_external_def:
11382 dump_printf (MSG_NOTE, "external\n");
11383 break;
11384 case vect_internal_def:
11385 dump_printf (MSG_NOTE, "internal\n");
11386 break;
11387 case vect_induction_def:
11388 dump_printf (MSG_NOTE, "induction\n");
11389 break;
11390 case vect_reduction_def:
11391 dump_printf (MSG_NOTE, "reduction\n");
11392 break;
11393 case vect_double_reduction_def:
11394 dump_printf (MSG_NOTE, "double reduction\n");
11395 break;
11396 case vect_nested_cycle:
11397 dump_printf (MSG_NOTE, "nested cycle\n");
11398 break;
11399 case vect_unknown_def_type:
11400 dump_printf (MSG_NOTE, "unknown\n");
11401 break;
11405 if (*dt == vect_unknown_def_type)
11407 if (dump_enabled_p ())
11408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11409 "Unsupported pattern.\n");
11410 return false;
11413 return true;
11416 /* Function vect_is_simple_use.
11418 Same as vect_is_simple_use but also determines the vector operand
11419 type of OPERAND and stores it to *VECTYPE. If the definition of
11420 OPERAND is vect_uninitialized_def, vect_constant_def or
11421 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11422 is responsible to compute the best suited vector type for the
11423 scalar operand. */
11425 bool
11426 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11427 tree *vectype, stmt_vec_info *def_stmt_info_out,
11428 gimple **def_stmt_out)
11430 stmt_vec_info def_stmt_info;
11431 gimple *def_stmt;
11432 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11433 return false;
11435 if (def_stmt_out)
11436 *def_stmt_out = def_stmt;
11437 if (def_stmt_info_out)
11438 *def_stmt_info_out = def_stmt_info;
11440 /* Now get a vector type if the def is internal, otherwise supply
11441 NULL_TREE and leave it up to the caller to figure out a proper
11442 type for the use stmt. */
11443 if (*dt == vect_internal_def
11444 || *dt == vect_induction_def
11445 || *dt == vect_reduction_def
11446 || *dt == vect_double_reduction_def
11447 || *dt == vect_nested_cycle)
11449 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11450 gcc_assert (*vectype != NULL_TREE);
11451 if (dump_enabled_p ())
11452 dump_printf_loc (MSG_NOTE, vect_location,
11453 "vect_is_simple_use: vectype %T\n", *vectype);
11455 else if (*dt == vect_uninitialized_def
11456 || *dt == vect_constant_def
11457 || *dt == vect_external_def)
11458 *vectype = NULL_TREE;
11459 else
11460 gcc_unreachable ();
11462 return true;
11465 /* Function vect_is_simple_use.
11467 Same as vect_is_simple_use but determines the operand by operand
11468 position OPERAND from either STMT or SLP_NODE, filling in *OP
11469 and *SLP_DEF (when SLP_NODE is not NULL). */
11471 bool
11472 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11473 unsigned operand, tree *op, slp_tree *slp_def,
11474 enum vect_def_type *dt,
11475 tree *vectype, stmt_vec_info *def_stmt_info_out)
11477 if (slp_node)
11479 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11480 *slp_def = child;
11481 *vectype = SLP_TREE_VECTYPE (child);
11482 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11484 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11485 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11487 else
11489 if (def_stmt_info_out)
11490 *def_stmt_info_out = NULL;
11491 *op = SLP_TREE_SCALAR_OPS (child)[0];
11492 *dt = SLP_TREE_DEF_TYPE (child);
11493 return true;
11496 else
11498 *slp_def = NULL;
11499 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11501 if (gimple_assign_rhs_code (ass) == COND_EXPR
11502 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11504 if (operand < 2)
11505 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11506 else
11507 *op = gimple_op (ass, operand);
11509 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11510 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11511 else
11512 *op = gimple_op (ass, operand + 1);
11514 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11515 *op = gimple_call_arg (call, operand);
11516 else
11517 gcc_unreachable ();
11518 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11522 /* If OP is not NULL and is external or constant update its vector
11523 type with VECTYPE. Returns true if successful or false if not,
11524 for example when conflicting vector types are present. */
11526 bool
11527 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11529 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11530 return true;
11531 if (SLP_TREE_VECTYPE (op))
11532 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11533 SLP_TREE_VECTYPE (op) = vectype;
11534 return true;
11537 /* Function supportable_widening_operation
11539 Check whether an operation represented by the code CODE is a
11540 widening operation that is supported by the target platform in
11541 vector form (i.e., when operating on arguments of type VECTYPE_IN
11542 producing a result of type VECTYPE_OUT).
11544 Widening operations we currently support are NOP (CONVERT), FLOAT,
11545 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11546 are supported by the target platform either directly (via vector
11547 tree-codes), or via target builtins.
11549 Output:
11550 - CODE1 and CODE2 are codes of vector operations to be used when
11551 vectorizing the operation, if available.
11552 - MULTI_STEP_CVT determines the number of required intermediate steps in
11553 case of multi-step conversion (like char->short->int - in that case
11554 MULTI_STEP_CVT will be 1).
11555 - INTERM_TYPES contains the intermediate type required to perform the
11556 widening operation (short in the above example). */
11558 bool
11559 supportable_widening_operation (vec_info *vinfo,
11560 enum tree_code code, stmt_vec_info stmt_info,
11561 tree vectype_out, tree vectype_in,
11562 enum tree_code *code1, enum tree_code *code2,
11563 int *multi_step_cvt,
11564 vec<tree> *interm_types)
11566 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11567 class loop *vect_loop = NULL;
11568 machine_mode vec_mode;
11569 enum insn_code icode1, icode2;
11570 optab optab1, optab2;
11571 tree vectype = vectype_in;
11572 tree wide_vectype = vectype_out;
11573 enum tree_code c1, c2;
11574 int i;
11575 tree prev_type, intermediate_type;
11576 machine_mode intermediate_mode, prev_mode;
11577 optab optab3, optab4;
11579 *multi_step_cvt = 0;
11580 if (loop_info)
11581 vect_loop = LOOP_VINFO_LOOP (loop_info);
11583 switch (code)
11585 case WIDEN_MULT_EXPR:
11586 /* The result of a vectorized widening operation usually requires
11587 two vectors (because the widened results do not fit into one vector).
11588 The generated vector results would normally be expected to be
11589 generated in the same order as in the original scalar computation,
11590 i.e. if 8 results are generated in each vector iteration, they are
11591 to be organized as follows:
11592 vect1: [res1,res2,res3,res4],
11593 vect2: [res5,res6,res7,res8].
11595 However, in the special case that the result of the widening
11596 operation is used in a reduction computation only, the order doesn't
11597 matter (because when vectorizing a reduction we change the order of
11598 the computation). Some targets can take advantage of this and
11599 generate more efficient code. For example, targets like Altivec,
11600 that support widen_mult using a sequence of {mult_even,mult_odd}
11601 generate the following vectors:
11602 vect1: [res1,res3,res5,res7],
11603 vect2: [res2,res4,res6,res8].
11605 When vectorizing outer-loops, we execute the inner-loop sequentially
11606 (each vectorized inner-loop iteration contributes to VF outer-loop
11607 iterations in parallel). We therefore don't allow to change the
11608 order of the computation in the inner-loop during outer-loop
11609 vectorization. */
11610 /* TODO: Another case in which order doesn't *really* matter is when we
11611 widen and then contract again, e.g. (short)((int)x * y >> 8).
11612 Normally, pack_trunc performs an even/odd permute, whereas the
11613 repack from an even/odd expansion would be an interleave, which
11614 would be significantly simpler for e.g. AVX2. */
11615 /* In any case, in order to avoid duplicating the code below, recurse
11616 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11617 are properly set up for the caller. If we fail, we'll continue with
11618 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11619 if (vect_loop
11620 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11621 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11622 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11623 stmt_info, vectype_out,
11624 vectype_in, code1, code2,
11625 multi_step_cvt, interm_types))
11627 /* Elements in a vector with vect_used_by_reduction property cannot
11628 be reordered if the use chain with this property does not have the
11629 same operation. One such an example is s += a * b, where elements
11630 in a and b cannot be reordered. Here we check if the vector defined
11631 by STMT is only directly used in the reduction statement. */
11632 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11633 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11634 if (use_stmt_info
11635 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11636 return true;
11638 c1 = VEC_WIDEN_MULT_LO_EXPR;
11639 c2 = VEC_WIDEN_MULT_HI_EXPR;
11640 break;
11642 case DOT_PROD_EXPR:
11643 c1 = DOT_PROD_EXPR;
11644 c2 = DOT_PROD_EXPR;
11645 break;
11647 case SAD_EXPR:
11648 c1 = SAD_EXPR;
11649 c2 = SAD_EXPR;
11650 break;
11652 case VEC_WIDEN_MULT_EVEN_EXPR:
11653 /* Support the recursion induced just above. */
11654 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11655 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11656 break;
11658 case WIDEN_LSHIFT_EXPR:
11659 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11660 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11661 break;
11663 case WIDEN_PLUS_EXPR:
11664 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11665 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11666 break;
11668 case WIDEN_MINUS_EXPR:
11669 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11670 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11671 break;
11673 CASE_CONVERT:
11674 c1 = VEC_UNPACK_LO_EXPR;
11675 c2 = VEC_UNPACK_HI_EXPR;
11676 break;
11678 case FLOAT_EXPR:
11679 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11680 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11681 break;
11683 case FIX_TRUNC_EXPR:
11684 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11685 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11686 break;
11688 default:
11689 gcc_unreachable ();
11692 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11693 std::swap (c1, c2);
11695 if (code == FIX_TRUNC_EXPR)
11697 /* The signedness is determined from output operand. */
11698 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11699 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11701 else if (CONVERT_EXPR_CODE_P (code)
11702 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11703 && VECTOR_BOOLEAN_TYPE_P (vectype)
11704 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11705 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11707 /* If the input and result modes are the same, a different optab
11708 is needed where we pass in the number of units in vectype. */
11709 optab1 = vec_unpacks_sbool_lo_optab;
11710 optab2 = vec_unpacks_sbool_hi_optab;
11712 else
11714 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11715 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11718 if (!optab1 || !optab2)
11719 return false;
11721 vec_mode = TYPE_MODE (vectype);
11722 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11723 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11724 return false;
11726 *code1 = c1;
11727 *code2 = c2;
11729 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11730 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11732 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11733 return true;
11734 /* For scalar masks we may have different boolean
11735 vector types having the same QImode. Thus we
11736 add additional check for elements number. */
11737 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11738 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11739 return true;
11742 /* Check if it's a multi-step conversion that can be done using intermediate
11743 types. */
11745 prev_type = vectype;
11746 prev_mode = vec_mode;
11748 if (!CONVERT_EXPR_CODE_P (code))
11749 return false;
11751 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11752 intermediate steps in promotion sequence. We try
11753 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11754 not. */
11755 interm_types->create (MAX_INTERM_CVT_STEPS);
11756 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11758 intermediate_mode = insn_data[icode1].operand[0].mode;
11759 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11760 intermediate_type
11761 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11762 else
11763 intermediate_type
11764 = lang_hooks.types.type_for_mode (intermediate_mode,
11765 TYPE_UNSIGNED (prev_type));
11767 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11768 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11769 && intermediate_mode == prev_mode
11770 && SCALAR_INT_MODE_P (prev_mode))
11772 /* If the input and result modes are the same, a different optab
11773 is needed where we pass in the number of units in vectype. */
11774 optab3 = vec_unpacks_sbool_lo_optab;
11775 optab4 = vec_unpacks_sbool_hi_optab;
11777 else
11779 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11780 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11783 if (!optab3 || !optab4
11784 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11785 || insn_data[icode1].operand[0].mode != intermediate_mode
11786 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11787 || insn_data[icode2].operand[0].mode != intermediate_mode
11788 || ((icode1 = optab_handler (optab3, intermediate_mode))
11789 == CODE_FOR_nothing)
11790 || ((icode2 = optab_handler (optab4, intermediate_mode))
11791 == CODE_FOR_nothing))
11792 break;
11794 interm_types->quick_push (intermediate_type);
11795 (*multi_step_cvt)++;
11797 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11798 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11800 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11801 return true;
11802 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11803 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11804 return true;
11807 prev_type = intermediate_type;
11808 prev_mode = intermediate_mode;
11811 interm_types->release ();
11812 return false;
11816 /* Function supportable_narrowing_operation
11818 Check whether an operation represented by the code CODE is a
11819 narrowing operation that is supported by the target platform in
11820 vector form (i.e., when operating on arguments of type VECTYPE_IN
11821 and producing a result of type VECTYPE_OUT).
11823 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11824 and FLOAT. This function checks if these operations are supported by
11825 the target platform directly via vector tree-codes.
11827 Output:
11828 - CODE1 is the code of a vector operation to be used when
11829 vectorizing the operation, if available.
11830 - MULTI_STEP_CVT determines the number of required intermediate steps in
11831 case of multi-step conversion (like int->short->char - in that case
11832 MULTI_STEP_CVT will be 1).
11833 - INTERM_TYPES contains the intermediate type required to perform the
11834 narrowing operation (short in the above example). */
11836 bool
11837 supportable_narrowing_operation (enum tree_code code,
11838 tree vectype_out, tree vectype_in,
11839 enum tree_code *code1, int *multi_step_cvt,
11840 vec<tree> *interm_types)
11842 machine_mode vec_mode;
11843 enum insn_code icode1;
11844 optab optab1, interm_optab;
11845 tree vectype = vectype_in;
11846 tree narrow_vectype = vectype_out;
11847 enum tree_code c1;
11848 tree intermediate_type, prev_type;
11849 machine_mode intermediate_mode, prev_mode;
11850 int i;
11851 bool uns;
11853 *multi_step_cvt = 0;
11854 switch (code)
11856 CASE_CONVERT:
11857 c1 = VEC_PACK_TRUNC_EXPR;
11858 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11859 && VECTOR_BOOLEAN_TYPE_P (vectype)
11860 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11861 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11862 optab1 = vec_pack_sbool_trunc_optab;
11863 else
11864 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11865 break;
11867 case FIX_TRUNC_EXPR:
11868 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11869 /* The signedness is determined from output operand. */
11870 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11871 break;
11873 case FLOAT_EXPR:
11874 c1 = VEC_PACK_FLOAT_EXPR;
11875 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11876 break;
11878 default:
11879 gcc_unreachable ();
11882 if (!optab1)
11883 return false;
11885 vec_mode = TYPE_MODE (vectype);
11886 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11887 return false;
11889 *code1 = c1;
11891 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11893 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11894 return true;
11895 /* For scalar masks we may have different boolean
11896 vector types having the same QImode. Thus we
11897 add additional check for elements number. */
11898 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11899 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11900 return true;
11903 if (code == FLOAT_EXPR)
11904 return false;
11906 /* Check if it's a multi-step conversion that can be done using intermediate
11907 types. */
11908 prev_mode = vec_mode;
11909 prev_type = vectype;
11910 if (code == FIX_TRUNC_EXPR)
11911 uns = TYPE_UNSIGNED (vectype_out);
11912 else
11913 uns = TYPE_UNSIGNED (vectype);
11915 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11916 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11917 costly than signed. */
11918 if (code == FIX_TRUNC_EXPR && uns)
11920 enum insn_code icode2;
11922 intermediate_type
11923 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11924 interm_optab
11925 = optab_for_tree_code (c1, intermediate_type, optab_default);
11926 if (interm_optab != unknown_optab
11927 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11928 && insn_data[icode1].operand[0].mode
11929 == insn_data[icode2].operand[0].mode)
11931 uns = false;
11932 optab1 = interm_optab;
11933 icode1 = icode2;
11937 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11938 intermediate steps in promotion sequence. We try
11939 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11940 interm_types->create (MAX_INTERM_CVT_STEPS);
11941 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11943 intermediate_mode = insn_data[icode1].operand[0].mode;
11944 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11945 intermediate_type
11946 = vect_double_mask_nunits (prev_type, intermediate_mode);
11947 else
11948 intermediate_type
11949 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11950 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11951 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11952 && intermediate_mode == prev_mode
11953 && SCALAR_INT_MODE_P (prev_mode))
11954 interm_optab = vec_pack_sbool_trunc_optab;
11955 else
11956 interm_optab
11957 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11958 optab_default);
11959 if (!interm_optab
11960 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11961 || insn_data[icode1].operand[0].mode != intermediate_mode
11962 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11963 == CODE_FOR_nothing))
11964 break;
11966 interm_types->quick_push (intermediate_type);
11967 (*multi_step_cvt)++;
11969 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11971 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11972 return true;
11973 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11974 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11975 return true;
11978 prev_mode = intermediate_mode;
11979 prev_type = intermediate_type;
11980 optab1 = interm_optab;
11983 interm_types->release ();
11984 return false;
11987 /* Generate and return a vector mask of MASK_TYPE such that
11988 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
11989 Add the statements to SEQ. */
11991 tree
11992 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
11993 tree end_index, const char *name)
11995 tree cmp_type = TREE_TYPE (start_index);
11996 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11997 cmp_type, mask_type,
11998 OPTIMIZE_FOR_SPEED));
11999 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12000 start_index, end_index,
12001 build_zero_cst (mask_type));
12002 tree tmp;
12003 if (name)
12004 tmp = make_temp_ssa_name (mask_type, NULL, name);
12005 else
12006 tmp = make_ssa_name (mask_type);
12007 gimple_call_set_lhs (call, tmp);
12008 gimple_seq_add_stmt (seq, call);
12009 return tmp;
12012 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12013 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12015 tree
12016 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12017 tree end_index)
12019 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12020 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12023 /* Try to compute the vector types required to vectorize STMT_INFO,
12024 returning true on success and false if vectorization isn't possible.
12025 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12026 take sure that the number of elements in the vectors is no bigger
12027 than GROUP_SIZE.
12029 On success:
12031 - Set *STMT_VECTYPE_OUT to:
12032 - NULL_TREE if the statement doesn't need to be vectorized;
12033 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12035 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12036 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12037 statement does not help to determine the overall number of units. */
12039 opt_result
12040 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12041 tree *stmt_vectype_out,
12042 tree *nunits_vectype_out,
12043 unsigned int group_size)
12045 gimple *stmt = stmt_info->stmt;
12047 /* For BB vectorization, we should always have a group size once we've
12048 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12049 are tentative requests during things like early data reference
12050 analysis and pattern recognition. */
12051 if (is_a <bb_vec_info> (vinfo))
12052 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12053 else
12054 group_size = 0;
12056 *stmt_vectype_out = NULL_TREE;
12057 *nunits_vectype_out = NULL_TREE;
12059 if (gimple_get_lhs (stmt) == NULL_TREE
12060 /* MASK_STORE has no lhs, but is ok. */
12061 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12063 if (is_a <gcall *> (stmt))
12065 /* Ignore calls with no lhs. These must be calls to
12066 #pragma omp simd functions, and what vectorization factor
12067 it really needs can't be determined until
12068 vectorizable_simd_clone_call. */
12069 if (dump_enabled_p ())
12070 dump_printf_loc (MSG_NOTE, vect_location,
12071 "defer to SIMD clone analysis.\n");
12072 return opt_result::success ();
12075 return opt_result::failure_at (stmt,
12076 "not vectorized: irregular stmt.%G", stmt);
12079 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12080 return opt_result::failure_at (stmt,
12081 "not vectorized: vector stmt in loop:%G",
12082 stmt);
12084 tree vectype;
12085 tree scalar_type = NULL_TREE;
12086 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12088 vectype = STMT_VINFO_VECTYPE (stmt_info);
12089 if (dump_enabled_p ())
12090 dump_printf_loc (MSG_NOTE, vect_location,
12091 "precomputed vectype: %T\n", vectype);
12093 else if (vect_use_mask_type_p (stmt_info))
12095 unsigned int precision = stmt_info->mask_precision;
12096 scalar_type = build_nonstandard_integer_type (precision, 1);
12097 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12098 if (!vectype)
12099 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12100 " data-type %T\n", scalar_type);
12101 if (dump_enabled_p ())
12102 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12104 else
12106 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12107 scalar_type = TREE_TYPE (DR_REF (dr));
12108 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12109 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12110 else
12111 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12113 if (dump_enabled_p ())
12115 if (group_size)
12116 dump_printf_loc (MSG_NOTE, vect_location,
12117 "get vectype for scalar type (group size %d):"
12118 " %T\n", group_size, scalar_type);
12119 else
12120 dump_printf_loc (MSG_NOTE, vect_location,
12121 "get vectype for scalar type: %T\n", scalar_type);
12123 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12124 if (!vectype)
12125 return opt_result::failure_at (stmt,
12126 "not vectorized:"
12127 " unsupported data-type %T\n",
12128 scalar_type);
12130 if (dump_enabled_p ())
12131 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12133 *stmt_vectype_out = vectype;
12135 /* Don't try to compute scalar types if the stmt produces a boolean
12136 vector; use the existing vector type instead. */
12137 tree nunits_vectype = vectype;
12138 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12140 /* The number of units is set according to the smallest scalar
12141 type (or the largest vector size, but we only support one
12142 vector size per vectorization). */
12143 HOST_WIDE_INT dummy;
12144 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12145 if (scalar_type != TREE_TYPE (vectype))
12147 if (dump_enabled_p ())
12148 dump_printf_loc (MSG_NOTE, vect_location,
12149 "get vectype for smallest scalar type: %T\n",
12150 scalar_type);
12151 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12152 group_size);
12153 if (!nunits_vectype)
12154 return opt_result::failure_at
12155 (stmt, "not vectorized: unsupported data-type %T\n",
12156 scalar_type);
12157 if (dump_enabled_p ())
12158 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12159 nunits_vectype);
12163 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12164 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12165 return opt_result::failure_at (stmt,
12166 "Not vectorized: Incompatible number "
12167 "of vector subparts between %T and %T\n",
12168 nunits_vectype, *stmt_vectype_out);
12170 if (dump_enabled_p ())
12172 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12173 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12174 dump_printf (MSG_NOTE, "\n");
12177 *nunits_vectype_out = nunits_vectype;
12178 return opt_result::success ();
12181 /* Generate and return statement sequence that sets vector length LEN that is:
12183 min_of_start_and_end = min (START_INDEX, END_INDEX);
12184 left_len = END_INDEX - min_of_start_and_end;
12185 rhs = min (left_len, LEN_LIMIT);
12186 LEN = rhs;
12188 Note: the cost of the code generated by this function is modeled
12189 by vect_estimate_min_profitable_iters, so changes here may need
12190 corresponding changes there. */
12192 gimple_seq
12193 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12195 gimple_seq stmts = NULL;
12196 tree len_type = TREE_TYPE (len);
12197 gcc_assert (TREE_TYPE (start_index) == len_type);
12199 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12200 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12201 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12202 gimple* stmt = gimple_build_assign (len, rhs);
12203 gimple_seq_add_stmt (&stmts, stmt);
12205 return stmts;