Patch ieee128-lib-patch008b
[official-gcc.git] / gcc / tree-vect-stmts.c
blob33210e1485b5b08083e3bdc28af3e723c09dc1cd
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
97 if ((kind == vector_load || kind == unaligned_load)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_gather_load;
100 if ((kind == vector_store || kind == unaligned_store)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_scatter_store;
104 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
105 body_cost_vec->safe_push (si);
107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
652 use_operand_p use_p;
653 ssa_op_iter iter;
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
704 default:
705 break;
708 if (is_pattern_stmt_p (stmt_vinfo))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
732 for (; i < gimple_num_ops (assign); i++)
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
747 for (i = 0; i < gimple_call_num_args (call); i++)
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
779 if (fatal)
780 *fatal = false;
781 return res;
784 } /* while worklist */
786 return opt_result::success ();
789 /* Compute the prologue cost for invariant or constant operands. */
791 static unsigned
792 vect_prologue_cost_for_slp_op (vec_info *vinfo,
793 slp_tree node, stmt_vec_info stmt_info,
794 unsigned opno, enum vect_def_type dt,
795 stmt_vector_for_cost *cost_vec)
797 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
798 tree op = gimple_op (stmt, opno);
799 unsigned prologue_cost = 0;
801 /* Without looking at the actual initializer a vector of
802 constants can be implemented as load from the constant pool.
803 When all elements are the same we can use a splat. */
804 tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
805 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
806 unsigned num_vects_to_check;
807 unsigned HOST_WIDE_INT const_nunits;
808 unsigned nelt_limit;
809 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
810 && ! multiple_p (const_nunits, group_size))
812 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813 nelt_limit = const_nunits;
815 else
817 /* If either the vector has variable length or the vectors
818 are composed of repeated whole groups we only need to
819 cost construction once. All vectors will be the same. */
820 num_vects_to_check = 1;
821 nelt_limit = group_size;
823 tree elt = NULL_TREE;
824 unsigned nelt = 0;
825 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
827 unsigned si = j % group_size;
828 if (nelt == 0)
829 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
830 /* ??? We're just tracking whether all operands of a single
831 vector initializer are the same, ideally we'd check if
832 we emitted the same one already. */
833 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
834 opno))
835 elt = NULL_TREE;
836 nelt++;
837 if (nelt == nelt_limit)
839 /* ??? We need to pass down stmt_info for a vector type
840 even if it points to the wrong stmt. */
841 prologue_cost += record_stmt_cost
842 (cost_vec, 1,
843 dt == vect_external_def
844 ? (elt ? scalar_to_vec : vec_construct)
845 : vector_load,
846 stmt_info, 0, vect_prologue);
847 nelt = 0;
851 return prologue_cost;
854 /* Function vect_model_simple_cost.
856 Models cost for simple operations, i.e. those that only emit ncopies of a
857 single op. Right now, this does not account for multiple insns that could
858 be generated for the single vector op. We will handle that shortly. */
860 static void
861 vect_model_simple_cost (vec_info *vinfo,
862 stmt_vec_info stmt_info, int ncopies,
863 enum vect_def_type *dt,
864 int ndts,
865 slp_tree node,
866 stmt_vector_for_cost *cost_vec,
867 vect_cost_for_stmt kind = vector_stmt)
869 int inside_cost = 0, prologue_cost = 0;
871 gcc_assert (cost_vec != NULL);
873 /* ??? Somehow we need to fix this at the callers. */
874 if (node)
875 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
877 if (node)
879 /* Scan operands and account for prologue cost of constants/externals.
880 ??? This over-estimates cost for multiple uses and should be
881 re-engineered. */
882 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
883 tree lhs = gimple_get_lhs (stmt);
884 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
886 tree op = gimple_op (stmt, i);
887 enum vect_def_type dt;
888 if (!op || op == lhs)
889 continue;
890 if (vect_is_simple_use (op, vinfo, &dt)
891 && (dt == vect_constant_def || dt == vect_external_def))
892 prologue_cost += vect_prologue_cost_for_slp_op (vinfo, node,
893 stmt_info,
894 i, dt, cost_vec);
897 else
898 /* Cost the "broadcast" of a scalar operand in to a vector operand.
899 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
900 cost model. */
901 for (int i = 0; i < ndts; i++)
902 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
903 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
904 stmt_info, 0, vect_prologue);
906 /* Adjust for two-operator SLP nodes. */
907 if (node && SLP_TREE_TWO_OPERATORS (node))
909 ncopies *= 2;
910 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
911 stmt_info, 0, vect_body);
914 /* Pass the inside-of-loop statements to the target-specific cost model. */
915 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
916 stmt_info, 0, vect_body);
918 if (dump_enabled_p ())
919 dump_printf_loc (MSG_NOTE, vect_location,
920 "vect_model_simple_cost: inside_cost = %d, "
921 "prologue_cost = %d .\n", inside_cost, prologue_cost);
925 /* Model cost for type demotion and promotion operations. PWR is
926 normally zero for single-step promotions and demotions. It will be
927 one if two-step promotion/demotion is required, and so on. NCOPIES
928 is the number of vector results (and thus number of instructions)
929 for the narrowest end of the operation chain. Each additional
930 step doubles the number of instructions required. */
932 static void
933 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
934 enum vect_def_type *dt,
935 unsigned int ncopies, int pwr,
936 stmt_vector_for_cost *cost_vec)
938 int i;
939 int inside_cost = 0, prologue_cost = 0;
941 for (i = 0; i < pwr + 1; i++)
943 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
944 stmt_info, 0, vect_body);
945 ncopies *= 2;
948 /* FORNOW: Assuming maximum 2 args per stmts. */
949 for (i = 0; i < 2; i++)
950 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
951 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
952 stmt_info, 0, vect_prologue);
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_NOTE, vect_location,
956 "vect_model_promotion_demotion_cost: inside_cost = %d, "
957 "prologue_cost = %d .\n", inside_cost, prologue_cost);
960 /* Returns true if the current function returns DECL. */
962 static bool
963 cfun_returns (tree decl)
965 edge_iterator ei;
966 edge e;
967 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
969 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
970 if (!ret)
971 continue;
972 if (gimple_return_retval (ret) == decl)
973 return true;
974 /* We often end up with an aggregate copy to the result decl,
975 handle that case as well. First skip intermediate clobbers
976 though. */
977 gimple *def = ret;
980 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
982 while (gimple_clobber_p (def));
983 if (is_a <gassign *> (def)
984 && gimple_assign_lhs (def) == gimple_return_retval (ret)
985 && gimple_assign_rhs1 (def) == decl)
986 return true;
988 return false;
991 /* Function vect_model_store_cost
993 Models cost for stores. In the case of grouped accesses, one access
994 has the overhead of the grouped access attributed to it. */
996 static void
997 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
998 enum vect_def_type dt,
999 vect_memory_access_type memory_access_type,
1000 vec_load_store_type vls_type, slp_tree slp_node,
1001 stmt_vector_for_cost *cost_vec)
1003 unsigned int inside_cost = 0, prologue_cost = 0;
1004 stmt_vec_info first_stmt_info = stmt_info;
1005 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1007 /* ??? Somehow we need to fix this at the callers. */
1008 if (slp_node)
1009 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1011 if (vls_type == VLS_STORE_INVARIANT)
1013 if (slp_node)
1014 prologue_cost += vect_prologue_cost_for_slp_op (vinfo, slp_node,
1015 stmt_info,
1016 1, dt, cost_vec);
1017 else
1018 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1019 stmt_info, 0, vect_prologue);
1022 /* Grouped stores update all elements in the group at once,
1023 so we want the DR for the first statement. */
1024 if (!slp_node && grouped_access_p)
1025 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1027 /* True if we should include any once-per-group costs as well as
1028 the cost of the statement itself. For SLP we only get called
1029 once per group anyhow. */
1030 bool first_stmt_p = (first_stmt_info == stmt_info);
1032 /* We assume that the cost of a single store-lanes instruction is
1033 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1034 access is instead being provided by a permute-and-store operation,
1035 include the cost of the permutes. */
1036 if (first_stmt_p
1037 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1039 /* Uses a high and low interleave or shuffle operations for each
1040 needed permute. */
1041 int group_size = DR_GROUP_SIZE (first_stmt_info);
1042 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1043 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1044 stmt_info, 0, vect_body);
1046 if (dump_enabled_p ())
1047 dump_printf_loc (MSG_NOTE, vect_location,
1048 "vect_model_store_cost: strided group_size = %d .\n",
1049 group_size);
1052 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1053 /* Costs of the stores. */
1054 if (memory_access_type == VMAT_ELEMENTWISE
1055 || memory_access_type == VMAT_GATHER_SCATTER)
1057 /* N scalar stores plus extracting the elements. */
1058 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1059 inside_cost += record_stmt_cost (cost_vec,
1060 ncopies * assumed_nunits,
1061 scalar_store, stmt_info, 0, vect_body);
1063 else
1064 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
1066 if (memory_access_type == VMAT_ELEMENTWISE
1067 || memory_access_type == VMAT_STRIDED_SLP)
1069 /* N scalar stores plus extracting the elements. */
1070 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1071 inside_cost += record_stmt_cost (cost_vec,
1072 ncopies * assumed_nunits,
1073 vec_to_scalar, stmt_info, 0, vect_body);
1076 /* When vectorizing a store into the function result assign
1077 a penalty if the function returns in a multi-register location.
1078 In this case we assume we'll end up with having to spill the
1079 vector result and do piecewise loads as a conservative estimate. */
1080 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1081 if (base
1082 && (TREE_CODE (base) == RESULT_DECL
1083 || (DECL_P (base) && cfun_returns (base)))
1084 && !aggregate_value_p (base, cfun->decl))
1086 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1087 /* ??? Handle PARALLEL in some way. */
1088 if (REG_P (reg))
1090 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1091 /* Assume that a single reg-reg move is possible and cheap,
1092 do not account for vector to gp register move cost. */
1093 if (nregs > 1)
1095 /* Spill. */
1096 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1097 vector_store,
1098 stmt_info, 0, vect_epilogue);
1099 /* Loads. */
1100 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1101 scalar_load,
1102 stmt_info, 0, vect_epilogue);
1107 if (dump_enabled_p ())
1108 dump_printf_loc (MSG_NOTE, vect_location,
1109 "vect_model_store_cost: inside_cost = %d, "
1110 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1114 /* Calculate cost of DR's memory access. */
1115 void
1116 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1117 unsigned int *inside_cost,
1118 stmt_vector_for_cost *body_cost_vec)
1120 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1121 int alignment_support_scheme
1122 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1124 switch (alignment_support_scheme)
1126 case dr_aligned:
1128 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1129 vector_store, stmt_info, 0,
1130 vect_body);
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE, vect_location,
1134 "vect_model_store_cost: aligned.\n");
1135 break;
1138 case dr_unaligned_supported:
1140 /* Here, we assign an additional cost for the unaligned store. */
1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1142 unaligned_store, stmt_info,
1143 DR_MISALIGNMENT (dr_info),
1144 vect_body);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_store_cost: unaligned supported by "
1148 "hardware.\n");
1149 break;
1152 case dr_unaligned_unsupported:
1154 *inside_cost = VECT_MAX_COST;
1156 if (dump_enabled_p ())
1157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1158 "vect_model_store_cost: unsupported access.\n");
1159 break;
1162 default:
1163 gcc_unreachable ();
1168 /* Function vect_model_load_cost
1170 Models cost for loads. In the case of grouped accesses, one access has
1171 the overhead of the grouped access attributed to it. Since unaligned
1172 accesses are supported for loads, we also account for the costs of the
1173 access scheme chosen. */
1175 static void
1176 vect_model_load_cost (vec_info *vinfo,
1177 stmt_vec_info stmt_info, unsigned ncopies,
1178 vect_memory_access_type memory_access_type,
1179 slp_instance instance,
1180 slp_tree slp_node,
1181 stmt_vector_for_cost *cost_vec)
1183 unsigned int inside_cost = 0, prologue_cost = 0;
1184 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1186 gcc_assert (cost_vec);
1188 /* ??? Somehow we need to fix this at the callers. */
1189 if (slp_node)
1190 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1192 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1194 /* If the load is permuted then the alignment is determined by
1195 the first group element not by the first scalar stmt DR. */
1196 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1197 /* Record the cost for the permutation. */
1198 unsigned n_perms;
1199 unsigned assumed_nunits
1200 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1201 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1202 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1203 slp_vf, instance, true,
1204 &n_perms);
1205 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1206 first_stmt_info, 0, vect_body);
1207 /* And adjust the number of loads performed. This handles
1208 redundancies as well as loads that are later dead. */
1209 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1210 bitmap_clear (perm);
1211 for (unsigned i = 0;
1212 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1213 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1214 ncopies = 0;
1215 bool load_seen = false;
1216 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1218 if (i % assumed_nunits == 0)
1220 if (load_seen)
1221 ncopies++;
1222 load_seen = false;
1224 if (bitmap_bit_p (perm, i))
1225 load_seen = true;
1227 if (load_seen)
1228 ncopies++;
1229 gcc_assert (ncopies
1230 <= (DR_GROUP_SIZE (first_stmt_info)
1231 - DR_GROUP_GAP (first_stmt_info)
1232 + assumed_nunits - 1) / assumed_nunits);
1235 /* Grouped loads read all elements in the group at once,
1236 so we want the DR for the first statement. */
1237 stmt_vec_info first_stmt_info = stmt_info;
1238 if (!slp_node && grouped_access_p)
1239 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1241 /* True if we should include any once-per-group costs as well as
1242 the cost of the statement itself. For SLP we only get called
1243 once per group anyhow. */
1244 bool first_stmt_p = (first_stmt_info == stmt_info);
1246 /* We assume that the cost of a single load-lanes instruction is
1247 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1248 access is instead being provided by a load-and-permute operation,
1249 include the cost of the permutes. */
1250 if (first_stmt_p
1251 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1253 /* Uses an even and odd extract operations or shuffle operations
1254 for each needed permute. */
1255 int group_size = DR_GROUP_SIZE (first_stmt_info);
1256 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1257 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1258 stmt_info, 0, vect_body);
1260 if (dump_enabled_p ())
1261 dump_printf_loc (MSG_NOTE, vect_location,
1262 "vect_model_load_cost: strided group_size = %d .\n",
1263 group_size);
1266 /* The loads themselves. */
1267 if (memory_access_type == VMAT_ELEMENTWISE
1268 || memory_access_type == VMAT_GATHER_SCATTER)
1270 /* N scalar loads plus gathering them into a vector. */
1271 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1272 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1273 inside_cost += record_stmt_cost (cost_vec,
1274 ncopies * assumed_nunits,
1275 scalar_load, stmt_info, 0, vect_body);
1277 else
1278 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1279 &inside_cost, &prologue_cost,
1280 cost_vec, cost_vec, true);
1281 if (memory_access_type == VMAT_ELEMENTWISE
1282 || memory_access_type == VMAT_STRIDED_SLP)
1283 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1284 stmt_info, 0, vect_body);
1286 if (dump_enabled_p ())
1287 dump_printf_loc (MSG_NOTE, vect_location,
1288 "vect_model_load_cost: inside_cost = %d, "
1289 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1293 /* Calculate cost of DR's memory access. */
1294 void
1295 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1296 bool add_realign_cost, unsigned int *inside_cost,
1297 unsigned int *prologue_cost,
1298 stmt_vector_for_cost *prologue_cost_vec,
1299 stmt_vector_for_cost *body_cost_vec,
1300 bool record_prologue_costs)
1302 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1303 int alignment_support_scheme
1304 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1306 switch (alignment_support_scheme)
1308 case dr_aligned:
1310 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1311 stmt_info, 0, vect_body);
1313 if (dump_enabled_p ())
1314 dump_printf_loc (MSG_NOTE, vect_location,
1315 "vect_model_load_cost: aligned.\n");
1317 break;
1319 case dr_unaligned_supported:
1321 /* Here, we assign an additional cost for the unaligned load. */
1322 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1323 unaligned_load, stmt_info,
1324 DR_MISALIGNMENT (dr_info),
1325 vect_body);
1327 if (dump_enabled_p ())
1328 dump_printf_loc (MSG_NOTE, vect_location,
1329 "vect_model_load_cost: unaligned supported by "
1330 "hardware.\n");
1332 break;
1334 case dr_explicit_realign:
1336 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1337 vector_load, stmt_info, 0, vect_body);
1338 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1339 vec_perm, stmt_info, 0, vect_body);
1341 /* FIXME: If the misalignment remains fixed across the iterations of
1342 the containing loop, the following cost should be added to the
1343 prologue costs. */
1344 if (targetm.vectorize.builtin_mask_for_load)
1345 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1346 stmt_info, 0, vect_body);
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE, vect_location,
1350 "vect_model_load_cost: explicit realign\n");
1352 break;
1354 case dr_explicit_realign_optimized:
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_NOTE, vect_location,
1358 "vect_model_load_cost: unaligned software "
1359 "pipelined.\n");
1361 /* Unaligned software pipeline has a load of an address, an initial
1362 load, and possibly a mask operation to "prime" the loop. However,
1363 if this is an access in a group of loads, which provide grouped
1364 access, then the above cost should only be considered for one
1365 access in the group. Inside the loop, there is a load op
1366 and a realignment op. */
1368 if (add_realign_cost && record_prologue_costs)
1370 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1371 vector_stmt, stmt_info,
1372 0, vect_prologue);
1373 if (targetm.vectorize.builtin_mask_for_load)
1374 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1375 vector_stmt, stmt_info,
1376 0, vect_prologue);
1379 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1380 stmt_info, 0, vect_body);
1381 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1382 stmt_info, 0, vect_body);
1384 if (dump_enabled_p ())
1385 dump_printf_loc (MSG_NOTE, vect_location,
1386 "vect_model_load_cost: explicit realign optimized"
1387 "\n");
1389 break;
1392 case dr_unaligned_unsupported:
1394 *inside_cost = VECT_MAX_COST;
1396 if (dump_enabled_p ())
1397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1398 "vect_model_load_cost: unsupported access.\n");
1399 break;
1402 default:
1403 gcc_unreachable ();
1407 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1408 the loop preheader for the vectorized stmt STMT_VINFO. */
1410 static void
1411 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1412 gimple_stmt_iterator *gsi)
1414 if (gsi)
1415 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1416 else
1418 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1420 if (loop_vinfo)
1422 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1423 basic_block new_bb;
1424 edge pe;
1426 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1427 loop = loop->inner;
1429 pe = loop_preheader_edge (loop);
1430 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1431 gcc_assert (!new_bb);
1433 else
1435 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
1436 basic_block bb;
1437 gimple_stmt_iterator gsi_bb_start;
1439 gcc_assert (bb_vinfo);
1440 bb = BB_VINFO_BB (bb_vinfo);
1441 gsi_bb_start = gsi_after_labels (bb);
1442 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1446 if (dump_enabled_p ())
1447 dump_printf_loc (MSG_NOTE, vect_location,
1448 "created new init_stmt: %G", new_stmt);
1451 /* Function vect_init_vector.
1453 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1454 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1455 vector type a vector with all elements equal to VAL is created first.
1456 Place the initialization at GSI if it is not NULL. Otherwise, place the
1457 initialization at the loop preheader.
1458 Return the DEF of INIT_STMT.
1459 It will be used in the vectorization of STMT_INFO. */
1461 tree
1462 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1463 gimple_stmt_iterator *gsi)
1465 gimple *init_stmt;
1466 tree new_temp;
1468 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1469 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1471 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1472 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1474 /* Scalar boolean value should be transformed into
1475 all zeros or all ones value before building a vector. */
1476 if (VECTOR_BOOLEAN_TYPE_P (type))
1478 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1479 tree false_val = build_zero_cst (TREE_TYPE (type));
1481 if (CONSTANT_CLASS_P (val))
1482 val = integer_zerop (val) ? false_val : true_val;
1483 else
1485 new_temp = make_ssa_name (TREE_TYPE (type));
1486 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1487 val, true_val, false_val);
1488 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1489 val = new_temp;
1492 else
1494 gimple_seq stmts = NULL;
1495 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1496 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1497 TREE_TYPE (type), val);
1498 else
1499 /* ??? Condition vectorization expects us to do
1500 promotion of invariant/external defs. */
1501 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1502 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1503 !gsi_end_p (gsi2); )
1505 init_stmt = gsi_stmt (gsi2);
1506 gsi_remove (&gsi2, false);
1507 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1511 val = build_vector_from_val (type, val);
1514 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1515 init_stmt = gimple_build_assign (new_temp, val);
1516 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1517 return new_temp;
1520 /* Function vect_get_vec_def_for_operand_1.
1522 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1523 with type DT that will be used in the vectorized stmt. */
1525 tree
1526 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1527 enum vect_def_type dt)
1529 tree vec_oprnd;
1530 stmt_vec_info vec_stmt_info;
1532 switch (dt)
1534 /* operand is a constant or a loop invariant. */
1535 case vect_constant_def:
1536 case vect_external_def:
1537 /* Code should use vect_get_vec_def_for_operand. */
1538 gcc_unreachable ();
1540 /* Operand is defined by a loop header phi. In case of nested
1541 cycles we also may have uses of the backedge def. */
1542 case vect_reduction_def:
1543 case vect_double_reduction_def:
1544 case vect_nested_cycle:
1545 case vect_induction_def:
1546 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1547 || dt == vect_nested_cycle);
1548 /* Fallthru. */
1550 /* operand is defined inside the loop. */
1551 case vect_internal_def:
1553 /* Get the def from the vectorized stmt. */
1554 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1555 /* Get vectorized pattern statement. */
1556 if (!vec_stmt_info
1557 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1558 && !STMT_VINFO_RELEVANT (def_stmt_info))
1559 vec_stmt_info = (STMT_VINFO_VEC_STMT
1560 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1561 gcc_assert (vec_stmt_info);
1562 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1563 vec_oprnd = PHI_RESULT (phi);
1564 else
1565 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1566 return vec_oprnd;
1569 default:
1570 gcc_unreachable ();
1575 /* Function vect_get_vec_def_for_operand.
1577 OP is an operand in STMT_VINFO. This function returns a (vector) def
1578 that will be used in the vectorized stmt for STMT_VINFO.
1580 In the case that OP is an SSA_NAME which is defined in the loop, then
1581 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1583 In case OP is an invariant or constant, a new stmt that creates a vector def
1584 needs to be introduced. VECTYPE may be used to specify a required type for
1585 vector invariant. */
1587 tree
1588 vect_get_vec_def_for_operand (vec_info *vinfo,
1589 tree op, stmt_vec_info stmt_vinfo, tree vectype)
1591 gimple *def_stmt;
1592 enum vect_def_type dt;
1593 bool is_simple_use;
1594 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1596 if (dump_enabled_p ())
1597 dump_printf_loc (MSG_NOTE, vect_location,
1598 "vect_get_vec_def_for_operand: %T\n", op);
1600 stmt_vec_info def_stmt_info;
1601 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1602 &def_stmt_info, &def_stmt);
1603 gcc_assert (is_simple_use);
1604 if (def_stmt && dump_enabled_p ())
1605 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1607 if (dt == vect_constant_def || dt == vect_external_def)
1609 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1610 tree vector_type;
1612 if (vectype)
1613 vector_type = vectype;
1614 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1615 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1616 vector_type = truth_type_for (stmt_vectype);
1617 else
1618 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1620 gcc_assert (vector_type);
1621 return vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1623 else
1624 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1628 /* Function vect_get_vec_def_for_stmt_copy
1630 Return a vector-def for an operand. This function is used when the
1631 vectorized stmt to be created (by the caller to this function) is a "copy"
1632 created in case the vectorized result cannot fit in one vector, and several
1633 copies of the vector-stmt are required. In this case the vector-def is
1634 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1635 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1637 Context:
1638 In case the vectorization factor (VF) is bigger than the number
1639 of elements that can fit in a vectype (nunits), we have to generate
1640 more than one vector stmt to vectorize the scalar stmt. This situation
1641 arises when there are multiple data-types operated upon in the loop; the
1642 smallest data-type determines the VF, and as a result, when vectorizing
1643 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1644 vector stmt (each computing a vector of 'nunits' results, and together
1645 computing 'VF' results in each iteration). This function is called when
1646 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1647 which VF=16 and nunits=4, so the number of copies required is 4):
1649 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1651 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1652 VS1.1: vx.1 = memref1 VS1.2
1653 VS1.2: vx.2 = memref2 VS1.3
1654 VS1.3: vx.3 = memref3
1656 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1657 VSnew.1: vz1 = vx.1 + ... VSnew.2
1658 VSnew.2: vz2 = vx.2 + ... VSnew.3
1659 VSnew.3: vz3 = vx.3 + ...
1661 The vectorization of S1 is explained in vectorizable_load.
1662 The vectorization of S2:
1663 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1664 the function 'vect_get_vec_def_for_operand' is called to
1665 get the relevant vector-def for each operand of S2. For operand x it
1666 returns the vector-def 'vx.0'.
1668 To create the remaining copies of the vector-stmt (VSnew.j), this
1669 function is called to get the relevant vector-def for each operand. It is
1670 obtained from the respective VS1.j stmt, which is recorded in the
1671 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1673 For example, to obtain the vector-def 'vx.1' in order to create the
1674 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1675 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1676 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1677 and return its def ('vx.1').
1678 Overall, to create the above sequence this function will be called 3 times:
1679 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1680 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1681 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1683 tree
1684 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1686 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1687 if (!def_stmt_info)
1688 /* Do nothing; can reuse same def. */
1689 return vec_oprnd;
1691 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1692 gcc_assert (def_stmt_info);
1693 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1694 vec_oprnd = PHI_RESULT (phi);
1695 else
1696 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1697 return vec_oprnd;
1701 /* Get vectorized definitions for the operands to create a copy of an original
1702 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1704 void
1705 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1706 vec<tree> *vec_oprnds0,
1707 vec<tree> *vec_oprnds1)
1709 tree vec_oprnd = vec_oprnds0->pop ();
1711 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1712 vec_oprnds0->quick_push (vec_oprnd);
1714 if (vec_oprnds1 && vec_oprnds1->length ())
1716 vec_oprnd = vec_oprnds1->pop ();
1717 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1718 vec_oprnds1->quick_push (vec_oprnd);
1723 /* Get vectorized definitions for OP0 and OP1. */
1725 void
1726 vect_get_vec_defs (vec_info *vinfo, tree op0, tree op1, stmt_vec_info stmt_info,
1727 vec<tree> *vec_oprnds0,
1728 vec<tree> *vec_oprnds1,
1729 slp_tree slp_node)
1731 if (slp_node)
1733 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1734 vect_get_slp_defs (vinfo, slp_node, &vec_defs, op1 ? 2 : 1);
1735 *vec_oprnds0 = vec_defs[0];
1736 if (op1)
1737 *vec_oprnds1 = vec_defs[1];
1739 else
1741 tree vec_oprnd;
1743 vec_oprnds0->create (1);
1744 vec_oprnd = vect_get_vec_def_for_operand (vinfo, op0, stmt_info);
1745 vec_oprnds0->quick_push (vec_oprnd);
1747 if (op1)
1749 vec_oprnds1->create (1);
1750 vec_oprnd = vect_get_vec_def_for_operand (vinfo, op1, stmt_info);
1751 vec_oprnds1->quick_push (vec_oprnd);
1756 /* Helper function called by vect_finish_replace_stmt and
1757 vect_finish_stmt_generation. Set the location of the new
1758 statement and create and return a stmt_vec_info for it. */
1760 static stmt_vec_info
1761 vect_finish_stmt_generation_1 (vec_info *vinfo,
1762 stmt_vec_info stmt_info, gimple *vec_stmt)
1764 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1766 if (dump_enabled_p ())
1767 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1769 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1771 /* While EH edges will generally prevent vectorization, stmt might
1772 e.g. be in a must-not-throw region. Ensure newly created stmts
1773 that could throw are part of the same region. */
1774 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1775 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1776 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1778 return vec_stmt_info;
1781 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1782 which sets the same scalar result as STMT_INFO did. Create and return a
1783 stmt_vec_info for VEC_STMT. */
1785 stmt_vec_info
1786 vect_finish_replace_stmt (vec_info *vinfo,
1787 stmt_vec_info stmt_info, gimple *vec_stmt)
1789 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1790 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1792 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1793 gsi_replace (&gsi, vec_stmt, true);
1795 return vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1798 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1799 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1801 stmt_vec_info
1802 vect_finish_stmt_generation (vec_info *vinfo,
1803 stmt_vec_info stmt_info, gimple *vec_stmt,
1804 gimple_stmt_iterator *gsi)
1806 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1808 if (!gsi_end_p (*gsi)
1809 && gimple_has_mem_ops (vec_stmt))
1811 gimple *at_stmt = gsi_stmt (*gsi);
1812 tree vuse = gimple_vuse (at_stmt);
1813 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1815 tree vdef = gimple_vdef (at_stmt);
1816 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1817 /* If we have an SSA vuse and insert a store, update virtual
1818 SSA form to avoid triggering the renamer. Do so only
1819 if we can easily see all uses - which is what almost always
1820 happens with the way vectorized stmts are inserted. */
1821 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1822 && ((is_gimple_assign (vec_stmt)
1823 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1824 || (is_gimple_call (vec_stmt)
1825 && !(gimple_call_flags (vec_stmt)
1826 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1828 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1829 gimple_set_vdef (vec_stmt, new_vdef);
1830 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1834 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1835 return vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1838 /* We want to vectorize a call to combined function CFN with function
1839 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1840 as the types of all inputs. Check whether this is possible using
1841 an internal function, returning its code if so or IFN_LAST if not. */
1843 static internal_fn
1844 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1845 tree vectype_out, tree vectype_in)
1847 internal_fn ifn;
1848 if (internal_fn_p (cfn))
1849 ifn = as_internal_fn (cfn);
1850 else
1851 ifn = associated_internal_fn (fndecl);
1852 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1854 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1855 if (info.vectorizable)
1857 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1858 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1859 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1860 OPTIMIZE_FOR_SPEED))
1861 return ifn;
1864 return IFN_LAST;
1868 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1869 gimple_stmt_iterator *);
1871 /* Check whether a load or store statement in the loop described by
1872 LOOP_VINFO is possible in a fully-masked loop. This is testing
1873 whether the vectorizer pass has the appropriate support, as well as
1874 whether the target does.
1876 VLS_TYPE says whether the statement is a load or store and VECTYPE
1877 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1878 says how the load or store is going to be implemented and GROUP_SIZE
1879 is the number of load or store statements in the containing group.
1880 If the access is a gather load or scatter store, GS_INFO describes
1881 its arguments. If the load or store is conditional, SCALAR_MASK is the
1882 condition under which it occurs.
1884 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1885 supported, otherwise record the required mask types. */
1887 static void
1888 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1889 vec_load_store_type vls_type, int group_size,
1890 vect_memory_access_type memory_access_type,
1891 gather_scatter_info *gs_info, tree scalar_mask)
1893 /* Invariant loads need no special support. */
1894 if (memory_access_type == VMAT_INVARIANT)
1895 return;
1897 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1898 machine_mode vecmode = TYPE_MODE (vectype);
1899 bool is_load = (vls_type == VLS_LOAD);
1900 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1902 if (is_load
1903 ? !vect_load_lanes_supported (vectype, group_size, true)
1904 : !vect_store_lanes_supported (vectype, group_size, true))
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1908 "can't use a fully-masked loop because the"
1909 " target doesn't have an appropriate masked"
1910 " load/store-lanes instruction.\n");
1911 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1912 return;
1914 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1915 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1916 return;
1919 if (memory_access_type == VMAT_GATHER_SCATTER)
1921 internal_fn ifn = (is_load
1922 ? IFN_MASK_GATHER_LOAD
1923 : IFN_MASK_SCATTER_STORE);
1924 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1925 gs_info->memory_type,
1926 gs_info->offset_vectype,
1927 gs_info->scale))
1929 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1931 "can't use a fully-masked loop because the"
1932 " target doesn't have an appropriate masked"
1933 " gather load or scatter store instruction.\n");
1934 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1935 return;
1937 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1938 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1939 return;
1942 if (memory_access_type != VMAT_CONTIGUOUS
1943 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1945 /* Element X of the data must come from iteration i * VF + X of the
1946 scalar loop. We need more work to support other mappings. */
1947 if (dump_enabled_p ())
1948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1949 "can't use a fully-masked loop because an access"
1950 " isn't contiguous.\n");
1951 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1952 return;
1955 machine_mode mask_mode;
1956 if (!VECTOR_MODE_P (vecmode)
1957 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1958 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1960 if (dump_enabled_p ())
1961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1962 "can't use a fully-masked loop because the target"
1963 " doesn't have the appropriate masked load or"
1964 " store.\n");
1965 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1966 return;
1968 /* We might load more scalars than we need for permuting SLP loads.
1969 We checked in get_group_load_store_type that the extra elements
1970 don't leak into a new vector. */
1971 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1972 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1973 unsigned int nvectors;
1974 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1975 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1976 else
1977 gcc_unreachable ();
1980 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1981 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1982 that needs to be applied to all loads and stores in a vectorized loop.
1983 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1985 MASK_TYPE is the type of both masks. If new statements are needed,
1986 insert them before GSI. */
1988 static tree
1989 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1990 gimple_stmt_iterator *gsi)
1992 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1993 if (!loop_mask)
1994 return vec_mask;
1996 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1997 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1998 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1999 vec_mask, loop_mask);
2000 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
2001 return and_res;
2004 /* Determine whether we can use a gather load or scatter store to vectorize
2005 strided load or store STMT_INFO by truncating the current offset to a
2006 smaller width. We need to be able to construct an offset vector:
2008 { 0, X, X*2, X*3, ... }
2010 without loss of precision, where X is STMT_INFO's DR_STEP.
2012 Return true if this is possible, describing the gather load or scatter
2013 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2015 static bool
2016 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2017 loop_vec_info loop_vinfo, bool masked_p,
2018 gather_scatter_info *gs_info)
2020 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2021 data_reference *dr = dr_info->dr;
2022 tree step = DR_STEP (dr);
2023 if (TREE_CODE (step) != INTEGER_CST)
2025 /* ??? Perhaps we could use range information here? */
2026 if (dump_enabled_p ())
2027 dump_printf_loc (MSG_NOTE, vect_location,
2028 "cannot truncate variable step.\n");
2029 return false;
2032 /* Get the number of bits in an element. */
2033 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2034 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2035 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2037 /* Set COUNT to the upper limit on the number of elements - 1.
2038 Start with the maximum vectorization factor. */
2039 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2041 /* Try lowering COUNT to the number of scalar latch iterations. */
2042 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2043 widest_int max_iters;
2044 if (max_loop_iterations (loop, &max_iters)
2045 && max_iters < count)
2046 count = max_iters.to_shwi ();
2048 /* Try scales of 1 and the element size. */
2049 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2050 wi::overflow_type overflow = wi::OVF_NONE;
2051 for (int i = 0; i < 2; ++i)
2053 int scale = scales[i];
2054 widest_int factor;
2055 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2056 continue;
2058 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
2059 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2060 if (overflow)
2061 continue;
2062 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2063 unsigned int min_offset_bits = wi::min_precision (range, sign);
2065 /* Find the narrowest viable offset type. */
2066 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
2067 tree offset_type = build_nonstandard_integer_type (offset_bits,
2068 sign == UNSIGNED);
2070 /* See whether the target supports the operation with an offset
2071 no narrower than OFFSET_TYPE. */
2072 tree memory_type = TREE_TYPE (DR_REF (dr));
2073 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
2074 vectype, memory_type, offset_type, scale,
2075 &gs_info->ifn, &gs_info->offset_vectype))
2076 continue;
2078 gs_info->decl = NULL_TREE;
2079 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2080 but we don't need to store that here. */
2081 gs_info->base = NULL_TREE;
2082 gs_info->element_type = TREE_TYPE (vectype);
2083 gs_info->offset = fold_convert (offset_type, step);
2084 gs_info->offset_dt = vect_constant_def;
2085 gs_info->scale = scale;
2086 gs_info->memory_type = memory_type;
2087 return true;
2090 if (overflow && dump_enabled_p ())
2091 dump_printf_loc (MSG_NOTE, vect_location,
2092 "truncating gather/scatter offset to %d bits"
2093 " might change its value.\n", element_bits);
2095 return false;
2098 /* Return true if we can use gather/scatter internal functions to
2099 vectorize STMT_INFO, which is a grouped or strided load or store.
2100 MASKED_P is true if load or store is conditional. When returning
2101 true, fill in GS_INFO with the information required to perform the
2102 operation. */
2104 static bool
2105 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2106 loop_vec_info loop_vinfo, bool masked_p,
2107 gather_scatter_info *gs_info)
2109 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2110 || gs_info->decl)
2111 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2112 masked_p, gs_info);
2114 tree old_offset_type = TREE_TYPE (gs_info->offset);
2115 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2117 gcc_assert (TYPE_PRECISION (new_offset_type)
2118 >= TYPE_PRECISION (old_offset_type));
2119 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2121 if (dump_enabled_p ())
2122 dump_printf_loc (MSG_NOTE, vect_location,
2123 "using gather/scatter for strided/grouped access,"
2124 " scale = %d\n", gs_info->scale);
2126 return true;
2129 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2130 elements with a known constant step. Return -1 if that step
2131 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2133 static int
2134 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2136 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2137 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2138 size_zero_node);
2141 /* If the target supports a permute mask that reverses the elements in
2142 a vector of type VECTYPE, return that mask, otherwise return null. */
2144 static tree
2145 perm_mask_for_reverse (tree vectype)
2147 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2149 /* The encoding has a single stepped pattern. */
2150 vec_perm_builder sel (nunits, 1, 3);
2151 for (int i = 0; i < 3; ++i)
2152 sel.quick_push (nunits - 1 - i);
2154 vec_perm_indices indices (sel, 1, nunits);
2155 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2156 return NULL_TREE;
2157 return vect_gen_perm_mask_checked (vectype, indices);
2160 /* A subroutine of get_load_store_type, with a subset of the same
2161 arguments. Handle the case where STMT_INFO is a load or store that
2162 accesses consecutive elements with a negative step. */
2164 static vect_memory_access_type
2165 get_negative_load_store_type (vec_info *vinfo,
2166 stmt_vec_info stmt_info, tree vectype,
2167 vec_load_store_type vls_type,
2168 unsigned int ncopies)
2170 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2171 dr_alignment_support alignment_support_scheme;
2173 if (ncopies > 1)
2175 if (dump_enabled_p ())
2176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2177 "multiple types with negative step.\n");
2178 return VMAT_ELEMENTWISE;
2181 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
2182 dr_info, false);
2183 if (alignment_support_scheme != dr_aligned
2184 && alignment_support_scheme != dr_unaligned_supported)
2186 if (dump_enabled_p ())
2187 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2188 "negative step but alignment required.\n");
2189 return VMAT_ELEMENTWISE;
2192 if (vls_type == VLS_STORE_INVARIANT)
2194 if (dump_enabled_p ())
2195 dump_printf_loc (MSG_NOTE, vect_location,
2196 "negative step with invariant source;"
2197 " no permute needed.\n");
2198 return VMAT_CONTIGUOUS_DOWN;
2201 if (!perm_mask_for_reverse (vectype))
2203 if (dump_enabled_p ())
2204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2205 "negative step and reversing not supported.\n");
2206 return VMAT_ELEMENTWISE;
2209 return VMAT_CONTIGUOUS_REVERSE;
2212 /* STMT_INFO is either a masked or unconditional store. Return the value
2213 being stored. */
2215 tree
2216 vect_get_store_rhs (stmt_vec_info stmt_info)
2218 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2220 gcc_assert (gimple_assign_single_p (assign));
2221 return gimple_assign_rhs1 (assign);
2223 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2225 internal_fn ifn = gimple_call_internal_fn (call);
2226 int index = internal_fn_stored_value_index (ifn);
2227 gcc_assert (index >= 0);
2228 return gimple_call_arg (call, index);
2230 gcc_unreachable ();
2233 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2235 This function returns a vector type which can be composed with NETLS pieces,
2236 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2237 same vector size as the return vector. It checks target whether supports
2238 pieces-size vector mode for construction firstly, if target fails to, check
2239 pieces-size scalar mode for construction further. It returns NULL_TREE if
2240 fails to find the available composition.
2242 For example, for (vtype=V16QI, nelts=4), we can probably get:
2243 - V16QI with PTYPE V4QI.
2244 - V4SI with PTYPE SI.
2245 - NULL_TREE. */
2247 static tree
2248 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2250 gcc_assert (VECTOR_TYPE_P (vtype));
2251 gcc_assert (known_gt (nelts, 0U));
2253 machine_mode vmode = TYPE_MODE (vtype);
2254 if (!VECTOR_MODE_P (vmode))
2255 return NULL_TREE;
2257 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2258 unsigned int pbsize;
2259 if (constant_multiple_p (vbsize, nelts, &pbsize))
2261 /* First check if vec_init optab supports construction from
2262 vector pieces directly. */
2263 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2264 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2265 machine_mode rmode;
2266 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2267 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2268 != CODE_FOR_nothing))
2270 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2271 return vtype;
2274 /* Otherwise check if exists an integer type of the same piece size and
2275 if vec_init optab supports construction from it directly. */
2276 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2277 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2278 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2279 != CODE_FOR_nothing))
2281 *ptype = build_nonstandard_integer_type (pbsize, 1);
2282 return build_vector_type (*ptype, nelts);
2286 return NULL_TREE;
2289 /* A subroutine of get_load_store_type, with a subset of the same
2290 arguments. Handle the case where STMT_INFO is part of a grouped load
2291 or store.
2293 For stores, the statements in the group are all consecutive
2294 and there is no gap at the end. For loads, the statements in the
2295 group might not be consecutive; there can be gaps between statements
2296 as well as at the end. */
2298 static bool
2299 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2300 tree vectype, bool slp,
2301 bool masked_p, vec_load_store_type vls_type,
2302 vect_memory_access_type *memory_access_type,
2303 gather_scatter_info *gs_info)
2305 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2306 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2307 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2308 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2309 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2310 bool single_element_p = (stmt_info == first_stmt_info
2311 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2312 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2313 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2315 /* True if the vectorized statements would access beyond the last
2316 statement in the group. */
2317 bool overrun_p = false;
2319 /* True if we can cope with such overrun by peeling for gaps, so that
2320 there is at least one final scalar iteration after the vector loop. */
2321 bool can_overrun_p = (!masked_p
2322 && vls_type == VLS_LOAD
2323 && loop_vinfo
2324 && !loop->inner);
2326 /* There can only be a gap at the end of the group if the stride is
2327 known at compile time. */
2328 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2330 /* Stores can't yet have gaps. */
2331 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2333 if (slp)
2335 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2337 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2338 separated by the stride, until we have a complete vector.
2339 Fall back to scalar accesses if that isn't possible. */
2340 if (multiple_p (nunits, group_size))
2341 *memory_access_type = VMAT_STRIDED_SLP;
2342 else
2343 *memory_access_type = VMAT_ELEMENTWISE;
2345 else
2347 overrun_p = loop_vinfo && gap != 0;
2348 if (overrun_p && vls_type != VLS_LOAD)
2350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2351 "Grouped store with gaps requires"
2352 " non-consecutive accesses\n");
2353 return false;
2355 /* An overrun is fine if the trailing elements are smaller
2356 than the alignment boundary B. Every vector access will
2357 be a multiple of B and so we are guaranteed to access a
2358 non-gap element in the same B-sized block. */
2359 if (overrun_p
2360 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2361 / vect_get_scalar_dr_size (first_dr_info)))
2362 overrun_p = false;
2364 /* If the gap splits the vector in half and the target
2365 can do half-vector operations avoid the epilogue peeling
2366 by simply loading half of the vector only. Usually
2367 the construction with an upper zero half will be elided. */
2368 dr_alignment_support alignment_support_scheme;
2369 tree half_vtype;
2370 if (overrun_p
2371 && !masked_p
2372 && (((alignment_support_scheme
2373 = vect_supportable_dr_alignment (vinfo,
2374 first_dr_info, false)))
2375 == dr_aligned
2376 || alignment_support_scheme == dr_unaligned_supported)
2377 && known_eq (nunits, (group_size - gap) * 2)
2378 && known_eq (nunits, group_size)
2379 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2380 != NULL_TREE))
2381 overrun_p = false;
2383 if (overrun_p && !can_overrun_p)
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "Peeling for outer loop is not supported\n");
2388 return false;
2390 int cmp = compare_step_with_zero (vinfo, stmt_info);
2391 if (cmp < 0)
2392 *memory_access_type = get_negative_load_store_type
2393 (vinfo, stmt_info, vectype, vls_type, 1);
2394 else
2396 gcc_assert (!loop_vinfo || cmp > 0);
2397 *memory_access_type = VMAT_CONTIGUOUS;
2401 else
2403 /* We can always handle this case using elementwise accesses,
2404 but see if something more efficient is available. */
2405 *memory_access_type = VMAT_ELEMENTWISE;
2407 /* If there is a gap at the end of the group then these optimizations
2408 would access excess elements in the last iteration. */
2409 bool would_overrun_p = (gap != 0);
2410 /* An overrun is fine if the trailing elements are smaller than the
2411 alignment boundary B. Every vector access will be a multiple of B
2412 and so we are guaranteed to access a non-gap element in the
2413 same B-sized block. */
2414 if (would_overrun_p
2415 && !masked_p
2416 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2417 / vect_get_scalar_dr_size (first_dr_info)))
2418 would_overrun_p = false;
2420 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2421 && (can_overrun_p || !would_overrun_p)
2422 && compare_step_with_zero (vinfo, stmt_info) > 0)
2424 /* First cope with the degenerate case of a single-element
2425 vector. */
2426 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2427 *memory_access_type = VMAT_CONTIGUOUS;
2429 /* Otherwise try using LOAD/STORE_LANES. */
2430 if (*memory_access_type == VMAT_ELEMENTWISE
2431 && (vls_type == VLS_LOAD
2432 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2433 : vect_store_lanes_supported (vectype, group_size,
2434 masked_p)))
2436 *memory_access_type = VMAT_LOAD_STORE_LANES;
2437 overrun_p = would_overrun_p;
2440 /* If that fails, try using permuting loads. */
2441 if (*memory_access_type == VMAT_ELEMENTWISE
2442 && (vls_type == VLS_LOAD
2443 ? vect_grouped_load_supported (vectype, single_element_p,
2444 group_size)
2445 : vect_grouped_store_supported (vectype, group_size)))
2447 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2448 overrun_p = would_overrun_p;
2452 /* As a last resort, trying using a gather load or scatter store.
2454 ??? Although the code can handle all group sizes correctly,
2455 it probably isn't a win to use separate strided accesses based
2456 on nearby locations. Or, even if it's a win over scalar code,
2457 it might not be a win over vectorizing at a lower VF, if that
2458 allows us to use contiguous accesses. */
2459 if (*memory_access_type == VMAT_ELEMENTWISE
2460 && single_element_p
2461 && loop_vinfo
2462 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2463 masked_p, gs_info))
2464 *memory_access_type = VMAT_GATHER_SCATTER;
2467 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2469 /* STMT is the leader of the group. Check the operands of all the
2470 stmts of the group. */
2471 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2472 while (next_stmt_info)
2474 tree op = vect_get_store_rhs (next_stmt_info);
2475 enum vect_def_type dt;
2476 if (!vect_is_simple_use (op, vinfo, &dt))
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2480 "use not simple.\n");
2481 return false;
2483 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2487 if (overrun_p)
2489 gcc_assert (can_overrun_p);
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2492 "Data access with gaps requires scalar "
2493 "epilogue loop\n");
2494 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2497 return true;
2500 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2501 if there is a memory access type that the vectorized form can use,
2502 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2503 or scatters, fill in GS_INFO accordingly.
2505 SLP says whether we're performing SLP rather than loop vectorization.
2506 MASKED_P is true if the statement is conditional on a vectorized mask.
2507 VECTYPE is the vector type that the vectorized statements will use.
2508 NCOPIES is the number of vector statements that will be needed. */
2510 static bool
2511 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2512 tree vectype, bool slp,
2513 bool masked_p, vec_load_store_type vls_type,
2514 unsigned int ncopies,
2515 vect_memory_access_type *memory_access_type,
2516 gather_scatter_info *gs_info)
2518 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2519 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2520 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2522 *memory_access_type = VMAT_GATHER_SCATTER;
2523 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2524 gcc_unreachable ();
2525 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2526 &gs_info->offset_dt,
2527 &gs_info->offset_vectype))
2529 if (dump_enabled_p ())
2530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2531 "%s index use not simple.\n",
2532 vls_type == VLS_LOAD ? "gather" : "scatter");
2533 return false;
2536 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2538 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp, masked_p,
2539 vls_type, memory_access_type, gs_info))
2540 return false;
2542 else if (STMT_VINFO_STRIDED_P (stmt_info))
2544 gcc_assert (!slp);
2545 if (loop_vinfo
2546 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2547 masked_p, gs_info))
2548 *memory_access_type = VMAT_GATHER_SCATTER;
2549 else
2550 *memory_access_type = VMAT_ELEMENTWISE;
2552 else
2554 int cmp = compare_step_with_zero (vinfo, stmt_info);
2555 if (cmp < 0)
2556 *memory_access_type = get_negative_load_store_type
2557 (vinfo, stmt_info, vectype, vls_type, ncopies);
2558 else if (cmp == 0)
2560 gcc_assert (vls_type == VLS_LOAD);
2561 *memory_access_type = VMAT_INVARIANT;
2563 else
2564 *memory_access_type = VMAT_CONTIGUOUS;
2567 if ((*memory_access_type == VMAT_ELEMENTWISE
2568 || *memory_access_type == VMAT_STRIDED_SLP)
2569 && !nunits.is_constant ())
2571 if (dump_enabled_p ())
2572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2573 "Not using elementwise accesses due to variable "
2574 "vectorization factor.\n");
2575 return false;
2578 /* FIXME: At the moment the cost model seems to underestimate the
2579 cost of using elementwise accesses. This check preserves the
2580 traditional behavior until that can be fixed. */
2581 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2582 if (!first_stmt_info)
2583 first_stmt_info = stmt_info;
2584 if (*memory_access_type == VMAT_ELEMENTWISE
2585 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2586 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2587 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2588 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2590 if (dump_enabled_p ())
2591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2592 "not falling back to elementwise accesses\n");
2593 return false;
2595 return true;
2598 /* Return true if boolean argument MASK is suitable for vectorizing
2599 conditional operation STMT_INFO. When returning true, store the type
2600 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2601 in *MASK_VECTYPE_OUT. */
2603 static bool
2604 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2605 vect_def_type *mask_dt_out,
2606 tree *mask_vectype_out)
2608 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2610 if (dump_enabled_p ())
2611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2612 "mask argument is not a boolean.\n");
2613 return false;
2616 if (TREE_CODE (mask) != SSA_NAME)
2618 if (dump_enabled_p ())
2619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2620 "mask argument is not an SSA name.\n");
2621 return false;
2624 enum vect_def_type mask_dt;
2625 tree mask_vectype;
2626 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2628 if (dump_enabled_p ())
2629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2630 "mask use not simple.\n");
2631 return false;
2634 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2635 if (!mask_vectype)
2636 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2638 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2640 if (dump_enabled_p ())
2641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2642 "could not find an appropriate vector mask type.\n");
2643 return false;
2646 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2647 TYPE_VECTOR_SUBPARTS (vectype)))
2649 if (dump_enabled_p ())
2650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2651 "vector mask type %T"
2652 " does not match vector data type %T.\n",
2653 mask_vectype, vectype);
2655 return false;
2658 *mask_dt_out = mask_dt;
2659 *mask_vectype_out = mask_vectype;
2660 return true;
2663 /* Return true if stored value RHS is suitable for vectorizing store
2664 statement STMT_INFO. When returning true, store the type of the
2665 definition in *RHS_DT_OUT, the type of the vectorized store value in
2666 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2668 static bool
2669 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, tree rhs,
2670 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2671 vec_load_store_type *vls_type_out)
2673 /* In the case this is a store from a constant make sure
2674 native_encode_expr can handle it. */
2675 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2677 if (dump_enabled_p ())
2678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2679 "cannot encode constant as a byte sequence.\n");
2680 return false;
2683 enum vect_def_type rhs_dt;
2684 tree rhs_vectype;
2685 if (!vect_is_simple_use (rhs, vinfo, &rhs_dt, &rhs_vectype))
2687 if (dump_enabled_p ())
2688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2689 "use not simple.\n");
2690 return false;
2693 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2694 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2696 if (dump_enabled_p ())
2697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2698 "incompatible vector types.\n");
2699 return false;
2702 *rhs_dt_out = rhs_dt;
2703 *rhs_vectype_out = rhs_vectype;
2704 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2705 *vls_type_out = VLS_STORE_INVARIANT;
2706 else
2707 *vls_type_out = VLS_STORE;
2708 return true;
2711 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2712 Note that we support masks with floating-point type, in which case the
2713 floats are interpreted as a bitmask. */
2715 static tree
2716 vect_build_all_ones_mask (vec_info *vinfo,
2717 stmt_vec_info stmt_info, tree masktype)
2719 if (TREE_CODE (masktype) == INTEGER_TYPE)
2720 return build_int_cst (masktype, -1);
2721 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2723 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2724 mask = build_vector_from_val (masktype, mask);
2725 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2727 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2729 REAL_VALUE_TYPE r;
2730 long tmp[6];
2731 for (int j = 0; j < 6; ++j)
2732 tmp[j] = -1;
2733 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2734 tree mask = build_real (TREE_TYPE (masktype), r);
2735 mask = build_vector_from_val (masktype, mask);
2736 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2738 gcc_unreachable ();
2741 /* Build an all-zero merge value of type VECTYPE while vectorizing
2742 STMT_INFO as a gather load. */
2744 static tree
2745 vect_build_zero_merge_argument (vec_info *vinfo,
2746 stmt_vec_info stmt_info, tree vectype)
2748 tree merge;
2749 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2750 merge = build_int_cst (TREE_TYPE (vectype), 0);
2751 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2753 REAL_VALUE_TYPE r;
2754 long tmp[6];
2755 for (int j = 0; j < 6; ++j)
2756 tmp[j] = 0;
2757 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2758 merge = build_real (TREE_TYPE (vectype), r);
2760 else
2761 gcc_unreachable ();
2762 merge = build_vector_from_val (vectype, merge);
2763 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2766 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2767 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2768 the gather load operation. If the load is conditional, MASK is the
2769 unvectorized condition and MASK_DT is its definition type, otherwise
2770 MASK is null. */
2772 static void
2773 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2774 gimple_stmt_iterator *gsi,
2775 stmt_vec_info *vec_stmt,
2776 gather_scatter_info *gs_info,
2777 tree mask)
2779 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2780 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2781 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2782 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2783 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2784 edge pe = loop_preheader_edge (loop);
2785 enum { NARROW, NONE, WIDEN } modifier;
2786 poly_uint64 gather_off_nunits
2787 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2789 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2790 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2791 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2792 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2793 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2794 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2795 tree scaletype = TREE_VALUE (arglist);
2796 tree real_masktype = masktype;
2797 gcc_checking_assert (types_compatible_p (srctype, rettype)
2798 && (!mask
2799 || TREE_CODE (masktype) == INTEGER_TYPE
2800 || types_compatible_p (srctype, masktype)));
2801 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2802 masktype = truth_type_for (srctype);
2804 tree mask_halftype = masktype;
2805 tree perm_mask = NULL_TREE;
2806 tree mask_perm_mask = NULL_TREE;
2807 if (known_eq (nunits, gather_off_nunits))
2808 modifier = NONE;
2809 else if (known_eq (nunits * 2, gather_off_nunits))
2811 modifier = WIDEN;
2813 /* Currently widening gathers and scatters are only supported for
2814 fixed-length vectors. */
2815 int count = gather_off_nunits.to_constant ();
2816 vec_perm_builder sel (count, count, 1);
2817 for (int i = 0; i < count; ++i)
2818 sel.quick_push (i | (count / 2));
2820 vec_perm_indices indices (sel, 1, count);
2821 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2822 indices);
2824 else if (known_eq (nunits, gather_off_nunits * 2))
2826 modifier = NARROW;
2828 /* Currently narrowing gathers and scatters are only supported for
2829 fixed-length vectors. */
2830 int count = nunits.to_constant ();
2831 vec_perm_builder sel (count, count, 1);
2832 sel.quick_grow (count);
2833 for (int i = 0; i < count; ++i)
2834 sel[i] = i < count / 2 ? i : i + count / 2;
2835 vec_perm_indices indices (sel, 2, count);
2836 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2838 ncopies *= 2;
2840 if (mask && masktype == real_masktype)
2842 for (int i = 0; i < count; ++i)
2843 sel[i] = i | (count / 2);
2844 indices.new_vector (sel, 2, count);
2845 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2847 else if (mask)
2848 mask_halftype = truth_type_for (gs_info->offset_vectype);
2850 else
2851 gcc_unreachable ();
2853 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2854 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2856 tree ptr = fold_convert (ptrtype, gs_info->base);
2857 if (!is_gimple_min_invariant (ptr))
2859 gimple_seq seq;
2860 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2861 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2862 gcc_assert (!new_bb);
2865 tree scale = build_int_cst (scaletype, gs_info->scale);
2867 tree vec_oprnd0 = NULL_TREE;
2868 tree vec_mask = NULL_TREE;
2869 tree src_op = NULL_TREE;
2870 tree mask_op = NULL_TREE;
2871 tree prev_res = NULL_TREE;
2872 stmt_vec_info prev_stmt_info = NULL;
2874 if (!mask)
2876 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2877 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2880 for (int j = 0; j < ncopies; ++j)
2882 tree op, var;
2883 if (modifier == WIDEN && (j & 1))
2884 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2885 perm_mask, stmt_info, gsi);
2886 else if (j == 0)
2887 op = vec_oprnd0
2888 = vect_get_vec_def_for_operand (vinfo, gs_info->offset, stmt_info);
2889 else
2890 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2891 vec_oprnd0);
2893 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2895 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2896 TYPE_VECTOR_SUBPARTS (idxtype)));
2897 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2898 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2899 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2900 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2901 op = var;
2904 if (mask)
2906 if (mask_perm_mask && (j & 1))
2907 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2908 mask_perm_mask, stmt_info, gsi);
2909 else
2911 if (j == 0)
2912 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info);
2913 else if (modifier != NARROW || (j & 1) == 0)
2914 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2915 vec_mask);
2917 mask_op = vec_mask;
2918 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2920 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2921 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2922 gcc_assert (known_eq (sub1, sub2));
2923 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2924 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2925 gassign *new_stmt
2926 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2927 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2928 mask_op = var;
2931 if (modifier == NARROW && masktype != real_masktype)
2933 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2934 gassign *new_stmt
2935 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2936 : VEC_UNPACK_LO_EXPR,
2937 mask_op);
2938 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2939 mask_op = var;
2941 src_op = mask_op;
2944 tree mask_arg = mask_op;
2945 if (masktype != real_masktype)
2947 tree utype, optype = TREE_TYPE (mask_op);
2948 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2949 utype = real_masktype;
2950 else
2951 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2952 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2953 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2954 gassign *new_stmt
2955 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2956 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2957 mask_arg = var;
2958 if (!useless_type_conversion_p (real_masktype, utype))
2960 gcc_assert (TYPE_PRECISION (utype)
2961 <= TYPE_PRECISION (real_masktype));
2962 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2963 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2964 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2965 mask_arg = var;
2967 src_op = build_zero_cst (srctype);
2969 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2970 mask_arg, scale);
2972 stmt_vec_info new_stmt_info;
2973 if (!useless_type_conversion_p (vectype, rettype))
2975 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2976 TYPE_VECTOR_SUBPARTS (rettype)));
2977 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2978 gimple_call_set_lhs (new_call, op);
2979 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
2980 var = make_ssa_name (vec_dest);
2981 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2982 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2983 new_stmt_info
2984 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2986 else
2988 var = make_ssa_name (vec_dest, new_call);
2989 gimple_call_set_lhs (new_call, var);
2990 new_stmt_info
2991 = vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
2994 if (modifier == NARROW)
2996 if ((j & 1) == 0)
2998 prev_res = var;
2999 continue;
3001 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3002 stmt_info, gsi);
3003 new_stmt_info = loop_vinfo->lookup_def (var);
3006 if (prev_stmt_info == NULL)
3007 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3008 else
3009 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3010 prev_stmt_info = new_stmt_info;
3014 /* Prepare the base and offset in GS_INFO for vectorization.
3015 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3016 to the vectorized offset argument for the first copy of STMT_INFO.
3017 STMT_INFO is the statement described by GS_INFO and LOOP is the
3018 containing loop. */
3020 static void
3021 vect_get_gather_scatter_ops (vec_info *vinfo,
3022 class loop *loop, stmt_vec_info stmt_info,
3023 gather_scatter_info *gs_info,
3024 tree *dataref_ptr, tree *vec_offset)
3026 gimple_seq stmts = NULL;
3027 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3028 if (stmts != NULL)
3030 basic_block new_bb;
3031 edge pe = loop_preheader_edge (loop);
3032 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3033 gcc_assert (!new_bb);
3035 *vec_offset = vect_get_vec_def_for_operand (vinfo, gs_info->offset, stmt_info,
3036 gs_info->offset_vectype);
3039 /* Prepare to implement a grouped or strided load or store using
3040 the gather load or scatter store operation described by GS_INFO.
3041 STMT_INFO is the load or store statement.
3043 Set *DATAREF_BUMP to the amount that should be added to the base
3044 address after each copy of the vectorized statement. Set *VEC_OFFSET
3045 to an invariant offset vector in which element I has the value
3046 I * DR_STEP / SCALE. */
3048 static void
3049 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3050 loop_vec_info loop_vinfo,
3051 gather_scatter_info *gs_info,
3052 tree *dataref_bump, tree *vec_offset)
3054 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3055 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3056 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3057 gimple_seq stmts;
3059 tree bump = size_binop (MULT_EXPR,
3060 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3061 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3062 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
3063 if (stmts)
3064 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3066 /* The offset given in GS_INFO can have pointer type, so use the element
3067 type of the vector instead. */
3068 tree offset_type = TREE_TYPE (gs_info->offset);
3069 offset_type = TREE_TYPE (gs_info->offset_vectype);
3071 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3072 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3073 ssize_int (gs_info->scale));
3074 step = fold_convert (offset_type, step);
3075 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3077 /* Create {0, X, X*2, X*3, ...}. */
3078 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
3079 build_zero_cst (offset_type), step);
3080 if (stmts)
3081 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3084 /* Return the amount that should be added to a vector pointer to move
3085 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3086 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3087 vectorization. */
3089 static tree
3090 vect_get_data_ptr_increment (vec_info *vinfo,
3091 dr_vec_info *dr_info, tree aggr_type,
3092 vect_memory_access_type memory_access_type)
3094 if (memory_access_type == VMAT_INVARIANT)
3095 return size_zero_node;
3097 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3098 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3099 if (tree_int_cst_sgn (step) == -1)
3100 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3101 return iv_step;
3104 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3106 static bool
3107 vectorizable_bswap (vec_info *vinfo,
3108 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3109 stmt_vec_info *vec_stmt, slp_tree slp_node,
3110 tree vectype_in, stmt_vector_for_cost *cost_vec)
3112 tree op, vectype;
3113 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3114 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3115 unsigned ncopies;
3117 op = gimple_call_arg (stmt, 0);
3118 vectype = STMT_VINFO_VECTYPE (stmt_info);
3119 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3121 /* Multiple types in SLP are handled by creating the appropriate number of
3122 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3123 case of SLP. */
3124 if (slp_node)
3125 ncopies = 1;
3126 else
3127 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3129 gcc_assert (ncopies >= 1);
3131 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3132 if (! char_vectype)
3133 return false;
3135 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3136 unsigned word_bytes;
3137 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3138 return false;
3140 /* The encoding uses one stepped pattern for each byte in the word. */
3141 vec_perm_builder elts (num_bytes, word_bytes, 3);
3142 for (unsigned i = 0; i < 3; ++i)
3143 for (unsigned j = 0; j < word_bytes; ++j)
3144 elts.quick_push ((i + 1) * word_bytes - j - 1);
3146 vec_perm_indices indices (elts, 1, num_bytes);
3147 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3148 return false;
3150 if (! vec_stmt)
3152 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3153 DUMP_VECT_SCOPE ("vectorizable_bswap");
3154 if (! slp_node)
3156 record_stmt_cost (cost_vec,
3157 1, vector_stmt, stmt_info, 0, vect_prologue);
3158 record_stmt_cost (cost_vec,
3159 ncopies, vec_perm, stmt_info, 0, vect_body);
3161 return true;
3164 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3166 /* Transform. */
3167 vec<tree> vec_oprnds = vNULL;
3168 stmt_vec_info new_stmt_info = NULL;
3169 stmt_vec_info prev_stmt_info = NULL;
3170 for (unsigned j = 0; j < ncopies; j++)
3172 /* Handle uses. */
3173 if (j == 0)
3174 vect_get_vec_defs (vinfo, op, NULL, stmt_info, &vec_oprnds, NULL,
3175 slp_node);
3176 else
3177 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3179 /* Arguments are ready. create the new vector stmt. */
3180 unsigned i;
3181 tree vop;
3182 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3184 gimple *new_stmt;
3185 tree tem = make_ssa_name (char_vectype);
3186 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3187 char_vectype, vop));
3188 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3189 tree tem2 = make_ssa_name (char_vectype);
3190 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3191 tem, tem, bswap_vconst);
3192 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3193 tem = make_ssa_name (vectype);
3194 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3195 vectype, tem2));
3196 new_stmt_info
3197 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3198 if (slp_node)
3199 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3202 if (slp_node)
3203 continue;
3205 if (j == 0)
3206 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3207 else
3208 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3210 prev_stmt_info = new_stmt_info;
3213 vec_oprnds.release ();
3214 return true;
3217 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3218 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3219 in a single step. On success, store the binary pack code in
3220 *CONVERT_CODE. */
3222 static bool
3223 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3224 tree_code *convert_code)
3226 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3227 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3228 return false;
3230 tree_code code;
3231 int multi_step_cvt = 0;
3232 auto_vec <tree, 8> interm_types;
3233 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3234 &code, &multi_step_cvt, &interm_types)
3235 || multi_step_cvt)
3236 return false;
3238 *convert_code = code;
3239 return true;
3242 /* Function vectorizable_call.
3244 Check if STMT_INFO performs a function call that can be vectorized.
3245 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3246 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3247 Return true if STMT_INFO is vectorizable in this way. */
3249 static bool
3250 vectorizable_call (vec_info *vinfo,
3251 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3252 stmt_vec_info *vec_stmt, slp_tree slp_node,
3253 stmt_vector_for_cost *cost_vec)
3255 gcall *stmt;
3256 tree vec_dest;
3257 tree scalar_dest;
3258 tree op;
3259 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3260 stmt_vec_info prev_stmt_info;
3261 tree vectype_out, vectype_in;
3262 poly_uint64 nunits_in;
3263 poly_uint64 nunits_out;
3264 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3265 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3266 tree fndecl, new_temp, rhs_type;
3267 enum vect_def_type dt[4]
3268 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3269 vect_unknown_def_type };
3270 tree vectypes[ARRAY_SIZE (dt)] = {};
3271 int ndts = ARRAY_SIZE (dt);
3272 int ncopies, j;
3273 auto_vec<tree, 8> vargs;
3274 auto_vec<tree, 8> orig_vargs;
3275 enum { NARROW, NONE, WIDEN } modifier;
3276 size_t i, nargs;
3277 tree lhs;
3279 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3280 return false;
3282 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3283 && ! vec_stmt)
3284 return false;
3286 /* Is STMT_INFO a vectorizable call? */
3287 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3288 if (!stmt)
3289 return false;
3291 if (gimple_call_internal_p (stmt)
3292 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3293 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3294 /* Handled by vectorizable_load and vectorizable_store. */
3295 return false;
3297 if (gimple_call_lhs (stmt) == NULL_TREE
3298 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3299 return false;
3301 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3303 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3305 /* Process function arguments. */
3306 rhs_type = NULL_TREE;
3307 vectype_in = NULL_TREE;
3308 nargs = gimple_call_num_args (stmt);
3310 /* Bail out if the function has more than three arguments, we do not have
3311 interesting builtin functions to vectorize with more than two arguments
3312 except for fma. No arguments is also not good. */
3313 if (nargs == 0 || nargs > 4)
3314 return false;
3316 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3317 combined_fn cfn = gimple_call_combined_fn (stmt);
3318 if (cfn == CFN_GOMP_SIMD_LANE)
3320 nargs = 0;
3321 rhs_type = unsigned_type_node;
3324 int mask_opno = -1;
3325 if (internal_fn_p (cfn))
3326 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3328 for (i = 0; i < nargs; i++)
3330 op = gimple_call_arg (stmt, i);
3332 if ((int) i == mask_opno)
3334 if (!vect_check_scalar_mask (vinfo,
3335 stmt_info, op, &dt[i], &vectypes[i]))
3336 return false;
3337 continue;
3340 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3342 if (dump_enabled_p ())
3343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3344 "use not simple.\n");
3345 return false;
3348 /* We can only handle calls with arguments of the same type. */
3349 if (rhs_type
3350 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3352 if (dump_enabled_p ())
3353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3354 "argument types differ.\n");
3355 return false;
3357 if (!rhs_type)
3358 rhs_type = TREE_TYPE (op);
3360 if (!vectype_in)
3361 vectype_in = vectypes[i];
3362 else if (vectypes[i]
3363 && !types_compatible_p (vectypes[i], vectype_in))
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3367 "argument vector types differ.\n");
3368 return false;
3371 /* If all arguments are external or constant defs, infer the vector type
3372 from the scalar type. */
3373 if (!vectype_in)
3374 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3375 if (vec_stmt)
3376 gcc_assert (vectype_in);
3377 if (!vectype_in)
3379 if (dump_enabled_p ())
3380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3381 "no vectype for scalar type %T\n", rhs_type);
3383 return false;
3385 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3386 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3387 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3388 by a pack of the two vectors into an SI vector. We would need
3389 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3390 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3392 if (dump_enabled_p ())
3393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3394 "mismatched vector sizes %T and %T\n",
3395 vectype_in, vectype_out);
3396 return false;
3399 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3400 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3402 if (dump_enabled_p ())
3403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3404 "mixed mask and nonmask vector types\n");
3405 return false;
3408 /* FORNOW */
3409 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3410 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3411 if (known_eq (nunits_in * 2, nunits_out))
3412 modifier = NARROW;
3413 else if (known_eq (nunits_out, nunits_in))
3414 modifier = NONE;
3415 else if (known_eq (nunits_out * 2, nunits_in))
3416 modifier = WIDEN;
3417 else
3418 return false;
3420 /* We only handle functions that do not read or clobber memory. */
3421 if (gimple_vuse (stmt))
3423 if (dump_enabled_p ())
3424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3425 "function reads from or writes to memory.\n");
3426 return false;
3429 /* For now, we only vectorize functions if a target specific builtin
3430 is available. TODO -- in some cases, it might be profitable to
3431 insert the calls for pieces of the vector, in order to be able
3432 to vectorize other operations in the loop. */
3433 fndecl = NULL_TREE;
3434 internal_fn ifn = IFN_LAST;
3435 tree callee = gimple_call_fndecl (stmt);
3437 /* First try using an internal function. */
3438 tree_code convert_code = ERROR_MARK;
3439 if (cfn != CFN_LAST
3440 && (modifier == NONE
3441 || (modifier == NARROW
3442 && simple_integer_narrowing (vectype_out, vectype_in,
3443 &convert_code))))
3444 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3445 vectype_in);
3447 /* If that fails, try asking for a target-specific built-in function. */
3448 if (ifn == IFN_LAST)
3450 if (cfn != CFN_LAST)
3451 fndecl = targetm.vectorize.builtin_vectorized_function
3452 (cfn, vectype_out, vectype_in);
3453 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3454 fndecl = targetm.vectorize.builtin_md_vectorized_function
3455 (callee, vectype_out, vectype_in);
3458 if (ifn == IFN_LAST && !fndecl)
3460 if (cfn == CFN_GOMP_SIMD_LANE
3461 && !slp_node
3462 && loop_vinfo
3463 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3464 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3465 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3466 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3468 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3469 { 0, 1, 2, ... vf - 1 } vector. */
3470 gcc_assert (nargs == 0);
3472 else if (modifier == NONE
3473 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3474 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3475 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3476 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3477 vectype_in, cost_vec);
3478 else
3480 if (dump_enabled_p ())
3481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3482 "function is not vectorizable.\n");
3483 return false;
3487 if (slp_node)
3488 ncopies = 1;
3489 else if (modifier == NARROW && ifn == IFN_LAST)
3490 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3491 else
3492 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3494 /* Sanity check: make sure that at least one copy of the vectorized stmt
3495 needs to be generated. */
3496 gcc_assert (ncopies >= 1);
3498 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3499 if (!vec_stmt) /* transformation not required. */
3501 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3502 DUMP_VECT_SCOPE ("vectorizable_call");
3503 vect_model_simple_cost (vinfo, stmt_info,
3504 ncopies, dt, ndts, slp_node, cost_vec);
3505 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3506 record_stmt_cost (cost_vec, ncopies / 2,
3507 vec_promote_demote, stmt_info, 0, vect_body);
3509 if (loop_vinfo && mask_opno >= 0)
3511 unsigned int nvectors = (slp_node
3512 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3513 : ncopies);
3514 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3515 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3516 vectype_out, scalar_mask);
3518 return true;
3521 /* Transform. */
3523 if (dump_enabled_p ())
3524 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3526 /* Handle def. */
3527 scalar_dest = gimple_call_lhs (stmt);
3528 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3530 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3532 stmt_vec_info new_stmt_info = NULL;
3533 prev_stmt_info = NULL;
3534 if (modifier == NONE || ifn != IFN_LAST)
3536 tree prev_res = NULL_TREE;
3537 vargs.safe_grow (nargs);
3538 orig_vargs.safe_grow (nargs);
3539 for (j = 0; j < ncopies; ++j)
3541 /* Build argument list for the vectorized call. */
3542 if (slp_node)
3544 auto_vec<vec<tree> > vec_defs (nargs);
3545 vec<tree> vec_oprnds0;
3547 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3548 vec_oprnds0 = vec_defs[0];
3550 /* Arguments are ready. Create the new vector stmt. */
3551 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3553 size_t k;
3554 for (k = 0; k < nargs; k++)
3556 vec<tree> vec_oprndsk = vec_defs[k];
3557 vargs[k] = vec_oprndsk[i];
3559 if (modifier == NARROW)
3561 /* We don't define any narrowing conditional functions
3562 at present. */
3563 gcc_assert (mask_opno < 0);
3564 tree half_res = make_ssa_name (vectype_in);
3565 gcall *call
3566 = gimple_build_call_internal_vec (ifn, vargs);
3567 gimple_call_set_lhs (call, half_res);
3568 gimple_call_set_nothrow (call, true);
3569 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3570 if ((i & 1) == 0)
3572 prev_res = half_res;
3573 continue;
3575 new_temp = make_ssa_name (vec_dest);
3576 gimple *new_stmt
3577 = gimple_build_assign (new_temp, convert_code,
3578 prev_res, half_res);
3579 new_stmt_info
3580 = vect_finish_stmt_generation (vinfo, stmt_info,
3581 new_stmt, gsi);
3583 else
3585 if (mask_opno >= 0 && masked_loop_p)
3587 unsigned int vec_num = vec_oprnds0.length ();
3588 /* Always true for SLP. */
3589 gcc_assert (ncopies == 1);
3590 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3591 vectype_out, i);
3592 vargs[mask_opno] = prepare_load_store_mask
3593 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3596 gcall *call;
3597 if (ifn != IFN_LAST)
3598 call = gimple_build_call_internal_vec (ifn, vargs);
3599 else
3600 call = gimple_build_call_vec (fndecl, vargs);
3601 new_temp = make_ssa_name (vec_dest, call);
3602 gimple_call_set_lhs (call, new_temp);
3603 gimple_call_set_nothrow (call, true);
3604 new_stmt_info
3605 = vect_finish_stmt_generation (vinfo, stmt_info,
3606 call, gsi);
3608 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3611 for (i = 0; i < nargs; i++)
3613 vec<tree> vec_oprndsi = vec_defs[i];
3614 vec_oprndsi.release ();
3616 continue;
3619 for (i = 0; i < nargs; i++)
3621 op = gimple_call_arg (stmt, i);
3622 if (j == 0)
3623 vec_oprnd0
3624 = vect_get_vec_def_for_operand (vinfo,
3625 op, stmt_info, vectypes[i]);
3626 else
3627 vec_oprnd0
3628 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3630 orig_vargs[i] = vargs[i] = vec_oprnd0;
3633 if (mask_opno >= 0 && masked_loop_p)
3635 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3636 vectype_out, j);
3637 vargs[mask_opno]
3638 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3639 vargs[mask_opno], gsi);
3642 if (cfn == CFN_GOMP_SIMD_LANE)
3644 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3645 tree new_var
3646 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3647 gimple *init_stmt = gimple_build_assign (new_var, cst);
3648 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3649 new_temp = make_ssa_name (vec_dest);
3650 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3651 new_stmt_info
3652 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3654 else if (modifier == NARROW)
3656 /* We don't define any narrowing conditional functions at
3657 present. */
3658 gcc_assert (mask_opno < 0);
3659 tree half_res = make_ssa_name (vectype_in);
3660 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3661 gimple_call_set_lhs (call, half_res);
3662 gimple_call_set_nothrow (call, true);
3663 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3664 if ((j & 1) == 0)
3666 prev_res = half_res;
3667 continue;
3669 new_temp = make_ssa_name (vec_dest);
3670 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3671 prev_res, half_res);
3672 new_stmt_info
3673 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3675 else
3677 gcall *call;
3678 if (ifn != IFN_LAST)
3679 call = gimple_build_call_internal_vec (ifn, vargs);
3680 else
3681 call = gimple_build_call_vec (fndecl, vargs);
3682 new_temp = make_ssa_name (vec_dest, call);
3683 gimple_call_set_lhs (call, new_temp);
3684 gimple_call_set_nothrow (call, true);
3685 new_stmt_info
3686 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3689 if (j == (modifier == NARROW ? 1 : 0))
3690 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3691 else
3692 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3694 prev_stmt_info = new_stmt_info;
3697 else if (modifier == NARROW)
3699 /* We don't define any narrowing conditional functions at present. */
3700 gcc_assert (mask_opno < 0);
3701 for (j = 0; j < ncopies; ++j)
3703 /* Build argument list for the vectorized call. */
3704 if (j == 0)
3705 vargs.create (nargs * 2);
3706 else
3707 vargs.truncate (0);
3709 if (slp_node)
3711 auto_vec<vec<tree> > vec_defs (nargs);
3712 vec<tree> vec_oprnds0;
3714 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3715 vec_oprnds0 = vec_defs[0];
3717 /* Arguments are ready. Create the new vector stmt. */
3718 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3720 size_t k;
3721 vargs.truncate (0);
3722 for (k = 0; k < nargs; k++)
3724 vec<tree> vec_oprndsk = vec_defs[k];
3725 vargs.quick_push (vec_oprndsk[i]);
3726 vargs.quick_push (vec_oprndsk[i + 1]);
3728 gcall *call;
3729 if (ifn != IFN_LAST)
3730 call = gimple_build_call_internal_vec (ifn, vargs);
3731 else
3732 call = gimple_build_call_vec (fndecl, vargs);
3733 new_temp = make_ssa_name (vec_dest, call);
3734 gimple_call_set_lhs (call, new_temp);
3735 gimple_call_set_nothrow (call, true);
3736 new_stmt_info
3737 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3738 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3741 for (i = 0; i < nargs; i++)
3743 vec<tree> vec_oprndsi = vec_defs[i];
3744 vec_oprndsi.release ();
3746 continue;
3749 for (i = 0; i < nargs; i++)
3751 op = gimple_call_arg (stmt, i);
3752 if (j == 0)
3754 vec_oprnd0
3755 = vect_get_vec_def_for_operand (vinfo, op, stmt_info,
3756 vectypes[i]);
3757 vec_oprnd1
3758 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3760 else
3762 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3763 2 * i + 1);
3764 vec_oprnd0
3765 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3766 vec_oprnd1
3767 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3770 vargs.quick_push (vec_oprnd0);
3771 vargs.quick_push (vec_oprnd1);
3774 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3775 new_temp = make_ssa_name (vec_dest, new_stmt);
3776 gimple_call_set_lhs (new_stmt, new_temp);
3777 new_stmt_info
3778 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3780 if (j == 0)
3781 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3782 else
3783 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3785 prev_stmt_info = new_stmt_info;
3788 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3790 else
3791 /* No current target implements this case. */
3792 return false;
3794 vargs.release ();
3796 /* The call in STMT might prevent it from being removed in dce.
3797 We however cannot remove it here, due to the way the ssa name
3798 it defines is mapped to the new definition. So just replace
3799 rhs of the statement with something harmless. */
3801 if (slp_node)
3802 return true;
3804 stmt_info = vect_orig_stmt (stmt_info);
3805 lhs = gimple_get_lhs (stmt_info->stmt);
3807 gassign *new_stmt
3808 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3809 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3811 return true;
3815 struct simd_call_arg_info
3817 tree vectype;
3818 tree op;
3819 HOST_WIDE_INT linear_step;
3820 enum vect_def_type dt;
3821 unsigned int align;
3822 bool simd_lane_linear;
3825 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3826 is linear within simd lane (but not within whole loop), note it in
3827 *ARGINFO. */
3829 static void
3830 vect_simd_lane_linear (tree op, class loop *loop,
3831 struct simd_call_arg_info *arginfo)
3833 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3835 if (!is_gimple_assign (def_stmt)
3836 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3837 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3838 return;
3840 tree base = gimple_assign_rhs1 (def_stmt);
3841 HOST_WIDE_INT linear_step = 0;
3842 tree v = gimple_assign_rhs2 (def_stmt);
3843 while (TREE_CODE (v) == SSA_NAME)
3845 tree t;
3846 def_stmt = SSA_NAME_DEF_STMT (v);
3847 if (is_gimple_assign (def_stmt))
3848 switch (gimple_assign_rhs_code (def_stmt))
3850 case PLUS_EXPR:
3851 t = gimple_assign_rhs2 (def_stmt);
3852 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3853 return;
3854 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3855 v = gimple_assign_rhs1 (def_stmt);
3856 continue;
3857 case MULT_EXPR:
3858 t = gimple_assign_rhs2 (def_stmt);
3859 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3860 return;
3861 linear_step = tree_to_shwi (t);
3862 v = gimple_assign_rhs1 (def_stmt);
3863 continue;
3864 CASE_CONVERT:
3865 t = gimple_assign_rhs1 (def_stmt);
3866 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3867 || (TYPE_PRECISION (TREE_TYPE (v))
3868 < TYPE_PRECISION (TREE_TYPE (t))))
3869 return;
3870 if (!linear_step)
3871 linear_step = 1;
3872 v = t;
3873 continue;
3874 default:
3875 return;
3877 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3878 && loop->simduid
3879 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3880 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3881 == loop->simduid))
3883 if (!linear_step)
3884 linear_step = 1;
3885 arginfo->linear_step = linear_step;
3886 arginfo->op = base;
3887 arginfo->simd_lane_linear = true;
3888 return;
3893 /* Return the number of elements in vector type VECTYPE, which is associated
3894 with a SIMD clone. At present these vectors always have a constant
3895 length. */
3897 static unsigned HOST_WIDE_INT
3898 simd_clone_subparts (tree vectype)
3900 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3903 /* Function vectorizable_simd_clone_call.
3905 Check if STMT_INFO performs a function call that can be vectorized
3906 by calling a simd clone of the function.
3907 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3908 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3909 Return true if STMT_INFO is vectorizable in this way. */
3911 static bool
3912 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3913 gimple_stmt_iterator *gsi,
3914 stmt_vec_info *vec_stmt, slp_tree slp_node,
3915 stmt_vector_for_cost *)
3917 tree vec_dest;
3918 tree scalar_dest;
3919 tree op, type;
3920 tree vec_oprnd0 = NULL_TREE;
3921 stmt_vec_info prev_stmt_info;
3922 tree vectype;
3923 unsigned int nunits;
3924 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3925 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3926 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3927 tree fndecl, new_temp;
3928 int ncopies, j;
3929 auto_vec<simd_call_arg_info> arginfo;
3930 vec<tree> vargs = vNULL;
3931 size_t i, nargs;
3932 tree lhs, rtype, ratype;
3933 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3935 /* Is STMT a vectorizable call? */
3936 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3937 if (!stmt)
3938 return false;
3940 fndecl = gimple_call_fndecl (stmt);
3941 if (fndecl == NULL_TREE)
3942 return false;
3944 struct cgraph_node *node = cgraph_node::get (fndecl);
3945 if (node == NULL || node->simd_clones == NULL)
3946 return false;
3948 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3949 return false;
3951 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3952 && ! vec_stmt)
3953 return false;
3955 if (gimple_call_lhs (stmt)
3956 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3957 return false;
3959 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3961 vectype = STMT_VINFO_VECTYPE (stmt_info);
3963 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3964 return false;
3966 /* FORNOW */
3967 if (slp_node)
3968 return false;
3970 /* Process function arguments. */
3971 nargs = gimple_call_num_args (stmt);
3973 /* Bail out if the function has zero arguments. */
3974 if (nargs == 0)
3975 return false;
3977 arginfo.reserve (nargs, true);
3979 for (i = 0; i < nargs; i++)
3981 simd_call_arg_info thisarginfo;
3982 affine_iv iv;
3984 thisarginfo.linear_step = 0;
3985 thisarginfo.align = 0;
3986 thisarginfo.op = NULL_TREE;
3987 thisarginfo.simd_lane_linear = false;
3989 op = gimple_call_arg (stmt, i);
3990 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3991 &thisarginfo.vectype)
3992 || thisarginfo.dt == vect_uninitialized_def)
3994 if (dump_enabled_p ())
3995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3996 "use not simple.\n");
3997 return false;
4000 if (thisarginfo.dt == vect_constant_def
4001 || thisarginfo.dt == vect_external_def)
4002 gcc_assert (thisarginfo.vectype == NULL_TREE);
4003 else
4005 gcc_assert (thisarginfo.vectype != NULL_TREE);
4006 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
4008 if (dump_enabled_p ())
4009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4010 "vector mask arguments are not supported\n");
4011 return false;
4015 /* For linear arguments, the analyze phase should have saved
4016 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4017 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4018 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4020 gcc_assert (vec_stmt);
4021 thisarginfo.linear_step
4022 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4023 thisarginfo.op
4024 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4025 thisarginfo.simd_lane_linear
4026 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4027 == boolean_true_node);
4028 /* If loop has been peeled for alignment, we need to adjust it. */
4029 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4030 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4031 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4033 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4034 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4035 tree opt = TREE_TYPE (thisarginfo.op);
4036 bias = fold_convert (TREE_TYPE (step), bias);
4037 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4038 thisarginfo.op
4039 = fold_build2 (POINTER_TYPE_P (opt)
4040 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4041 thisarginfo.op, bias);
4044 else if (!vec_stmt
4045 && thisarginfo.dt != vect_constant_def
4046 && thisarginfo.dt != vect_external_def
4047 && loop_vinfo
4048 && TREE_CODE (op) == SSA_NAME
4049 && simple_iv (loop, loop_containing_stmt (stmt), op,
4050 &iv, false)
4051 && tree_fits_shwi_p (iv.step))
4053 thisarginfo.linear_step = tree_to_shwi (iv.step);
4054 thisarginfo.op = iv.base;
4056 else if ((thisarginfo.dt == vect_constant_def
4057 || thisarginfo.dt == vect_external_def)
4058 && POINTER_TYPE_P (TREE_TYPE (op)))
4059 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4060 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4061 linear too. */
4062 if (POINTER_TYPE_P (TREE_TYPE (op))
4063 && !thisarginfo.linear_step
4064 && !vec_stmt
4065 && thisarginfo.dt != vect_constant_def
4066 && thisarginfo.dt != vect_external_def
4067 && loop_vinfo
4068 && !slp_node
4069 && TREE_CODE (op) == SSA_NAME)
4070 vect_simd_lane_linear (op, loop, &thisarginfo);
4072 arginfo.quick_push (thisarginfo);
4075 unsigned HOST_WIDE_INT vf;
4076 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4078 if (dump_enabled_p ())
4079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4080 "not considering SIMD clones; not yet supported"
4081 " for variable-width vectors.\n");
4082 return false;
4085 unsigned int badness = 0;
4086 struct cgraph_node *bestn = NULL;
4087 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4088 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4089 else
4090 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4091 n = n->simdclone->next_clone)
4093 unsigned int this_badness = 0;
4094 if (n->simdclone->simdlen > vf
4095 || n->simdclone->nargs != nargs)
4096 continue;
4097 if (n->simdclone->simdlen < vf)
4098 this_badness += (exact_log2 (vf)
4099 - exact_log2 (n->simdclone->simdlen)) * 1024;
4100 if (n->simdclone->inbranch)
4101 this_badness += 2048;
4102 int target_badness = targetm.simd_clone.usable (n);
4103 if (target_badness < 0)
4104 continue;
4105 this_badness += target_badness * 512;
4106 /* FORNOW: Have to add code to add the mask argument. */
4107 if (n->simdclone->inbranch)
4108 continue;
4109 for (i = 0; i < nargs; i++)
4111 switch (n->simdclone->args[i].arg_type)
4113 case SIMD_CLONE_ARG_TYPE_VECTOR:
4114 if (!useless_type_conversion_p
4115 (n->simdclone->args[i].orig_type,
4116 TREE_TYPE (gimple_call_arg (stmt, i))))
4117 i = -1;
4118 else if (arginfo[i].dt == vect_constant_def
4119 || arginfo[i].dt == vect_external_def
4120 || arginfo[i].linear_step)
4121 this_badness += 64;
4122 break;
4123 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4124 if (arginfo[i].dt != vect_constant_def
4125 && arginfo[i].dt != vect_external_def)
4126 i = -1;
4127 break;
4128 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4129 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4130 if (arginfo[i].dt == vect_constant_def
4131 || arginfo[i].dt == vect_external_def
4132 || (arginfo[i].linear_step
4133 != n->simdclone->args[i].linear_step))
4134 i = -1;
4135 break;
4136 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4137 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4138 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4139 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4140 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4141 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4142 /* FORNOW */
4143 i = -1;
4144 break;
4145 case SIMD_CLONE_ARG_TYPE_MASK:
4146 gcc_unreachable ();
4148 if (i == (size_t) -1)
4149 break;
4150 if (n->simdclone->args[i].alignment > arginfo[i].align)
4152 i = -1;
4153 break;
4155 if (arginfo[i].align)
4156 this_badness += (exact_log2 (arginfo[i].align)
4157 - exact_log2 (n->simdclone->args[i].alignment));
4159 if (i == (size_t) -1)
4160 continue;
4161 if (bestn == NULL || this_badness < badness)
4163 bestn = n;
4164 badness = this_badness;
4168 if (bestn == NULL)
4169 return false;
4171 for (i = 0; i < nargs; i++)
4172 if ((arginfo[i].dt == vect_constant_def
4173 || arginfo[i].dt == vect_external_def)
4174 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4176 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4177 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4178 slp_node);
4179 if (arginfo[i].vectype == NULL
4180 || (simd_clone_subparts (arginfo[i].vectype)
4181 > bestn->simdclone->simdlen))
4182 return false;
4185 fndecl = bestn->decl;
4186 nunits = bestn->simdclone->simdlen;
4187 ncopies = vf / nunits;
4189 /* If the function isn't const, only allow it in simd loops where user
4190 has asserted that at least nunits consecutive iterations can be
4191 performed using SIMD instructions. */
4192 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4193 && gimple_vuse (stmt))
4194 return false;
4196 /* Sanity check: make sure that at least one copy of the vectorized stmt
4197 needs to be generated. */
4198 gcc_assert (ncopies >= 1);
4200 if (!vec_stmt) /* transformation not required. */
4202 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4203 for (i = 0; i < nargs; i++)
4204 if ((bestn->simdclone->args[i].arg_type
4205 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4206 || (bestn->simdclone->args[i].arg_type
4207 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4209 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4210 + 1);
4211 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4212 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4213 ? size_type_node : TREE_TYPE (arginfo[i].op);
4214 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4215 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4216 tree sll = arginfo[i].simd_lane_linear
4217 ? boolean_true_node : boolean_false_node;
4218 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4220 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4221 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4222 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4223 dt, slp_node, cost_vec); */
4224 return true;
4227 /* Transform. */
4229 if (dump_enabled_p ())
4230 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4232 /* Handle def. */
4233 scalar_dest = gimple_call_lhs (stmt);
4234 vec_dest = NULL_TREE;
4235 rtype = NULL_TREE;
4236 ratype = NULL_TREE;
4237 if (scalar_dest)
4239 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4240 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4241 if (TREE_CODE (rtype) == ARRAY_TYPE)
4243 ratype = rtype;
4244 rtype = TREE_TYPE (ratype);
4248 prev_stmt_info = NULL;
4249 for (j = 0; j < ncopies; ++j)
4251 /* Build argument list for the vectorized call. */
4252 if (j == 0)
4253 vargs.create (nargs);
4254 else
4255 vargs.truncate (0);
4257 for (i = 0; i < nargs; i++)
4259 unsigned int k, l, m, o;
4260 tree atype;
4261 op = gimple_call_arg (stmt, i);
4262 switch (bestn->simdclone->args[i].arg_type)
4264 case SIMD_CLONE_ARG_TYPE_VECTOR:
4265 atype = bestn->simdclone->args[i].vector_type;
4266 o = nunits / simd_clone_subparts (atype);
4267 for (m = j * o; m < (j + 1) * o; m++)
4269 if (simd_clone_subparts (atype)
4270 < simd_clone_subparts (arginfo[i].vectype))
4272 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4273 k = (simd_clone_subparts (arginfo[i].vectype)
4274 / simd_clone_subparts (atype));
4275 gcc_assert ((k & (k - 1)) == 0);
4276 if (m == 0)
4277 vec_oprnd0
4278 = vect_get_vec_def_for_operand (vinfo, op, stmt_info);
4279 else
4281 vec_oprnd0 = arginfo[i].op;
4282 if ((m & (k - 1)) == 0)
4283 vec_oprnd0
4284 = vect_get_vec_def_for_stmt_copy (vinfo,
4285 vec_oprnd0);
4287 arginfo[i].op = vec_oprnd0;
4288 vec_oprnd0
4289 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4290 bitsize_int (prec),
4291 bitsize_int ((m & (k - 1)) * prec));
4292 gassign *new_stmt
4293 = gimple_build_assign (make_ssa_name (atype),
4294 vec_oprnd0);
4295 vect_finish_stmt_generation (vinfo, stmt_info,
4296 new_stmt, gsi);
4297 vargs.safe_push (gimple_assign_lhs (new_stmt));
4299 else
4301 k = (simd_clone_subparts (atype)
4302 / simd_clone_subparts (arginfo[i].vectype));
4303 gcc_assert ((k & (k - 1)) == 0);
4304 vec<constructor_elt, va_gc> *ctor_elts;
4305 if (k != 1)
4306 vec_alloc (ctor_elts, k);
4307 else
4308 ctor_elts = NULL;
4309 for (l = 0; l < k; l++)
4311 if (m == 0 && l == 0)
4312 vec_oprnd0
4313 = vect_get_vec_def_for_operand (vinfo,
4314 op, stmt_info);
4315 else
4316 vec_oprnd0
4317 = vect_get_vec_def_for_stmt_copy (vinfo,
4318 arginfo[i].op);
4319 arginfo[i].op = vec_oprnd0;
4320 if (k == 1)
4321 break;
4322 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4323 vec_oprnd0);
4325 if (k == 1)
4326 vargs.safe_push (vec_oprnd0);
4327 else
4329 vec_oprnd0 = build_constructor (atype, ctor_elts);
4330 gassign *new_stmt
4331 = gimple_build_assign (make_ssa_name (atype),
4332 vec_oprnd0);
4333 vect_finish_stmt_generation (vinfo, stmt_info,
4334 new_stmt, gsi);
4335 vargs.safe_push (gimple_assign_lhs (new_stmt));
4339 break;
4340 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4341 vargs.safe_push (op);
4342 break;
4343 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4344 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4345 if (j == 0)
4347 gimple_seq stmts;
4348 arginfo[i].op
4349 = force_gimple_operand (unshare_expr (arginfo[i].op),
4350 &stmts, true, NULL_TREE);
4351 if (stmts != NULL)
4353 basic_block new_bb;
4354 edge pe = loop_preheader_edge (loop);
4355 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4356 gcc_assert (!new_bb);
4358 if (arginfo[i].simd_lane_linear)
4360 vargs.safe_push (arginfo[i].op);
4361 break;
4363 tree phi_res = copy_ssa_name (op);
4364 gphi *new_phi = create_phi_node (phi_res, loop->header);
4365 loop_vinfo->add_stmt (new_phi);
4366 add_phi_arg (new_phi, arginfo[i].op,
4367 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4368 enum tree_code code
4369 = POINTER_TYPE_P (TREE_TYPE (op))
4370 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4371 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4372 ? sizetype : TREE_TYPE (op);
4373 widest_int cst
4374 = wi::mul (bestn->simdclone->args[i].linear_step,
4375 ncopies * nunits);
4376 tree tcst = wide_int_to_tree (type, cst);
4377 tree phi_arg = copy_ssa_name (op);
4378 gassign *new_stmt
4379 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4380 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4381 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4382 loop_vinfo->add_stmt (new_stmt);
4383 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4384 UNKNOWN_LOCATION);
4385 arginfo[i].op = phi_res;
4386 vargs.safe_push (phi_res);
4388 else
4390 enum tree_code code
4391 = POINTER_TYPE_P (TREE_TYPE (op))
4392 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4393 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4394 ? sizetype : TREE_TYPE (op);
4395 widest_int cst
4396 = wi::mul (bestn->simdclone->args[i].linear_step,
4397 j * nunits);
4398 tree tcst = wide_int_to_tree (type, cst);
4399 new_temp = make_ssa_name (TREE_TYPE (op));
4400 gassign *new_stmt
4401 = gimple_build_assign (new_temp, code,
4402 arginfo[i].op, tcst);
4403 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4404 vargs.safe_push (new_temp);
4406 break;
4407 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4408 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4409 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4410 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4411 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4412 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4413 default:
4414 gcc_unreachable ();
4418 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4419 if (vec_dest)
4421 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4422 if (ratype)
4423 new_temp = create_tmp_var (ratype);
4424 else if (simd_clone_subparts (vectype)
4425 == simd_clone_subparts (rtype))
4426 new_temp = make_ssa_name (vec_dest, new_call);
4427 else
4428 new_temp = make_ssa_name (rtype, new_call);
4429 gimple_call_set_lhs (new_call, new_temp);
4431 stmt_vec_info new_stmt_info
4432 = vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4434 if (vec_dest)
4436 if (simd_clone_subparts (vectype) < nunits)
4438 unsigned int k, l;
4439 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4440 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4441 k = nunits / simd_clone_subparts (vectype);
4442 gcc_assert ((k & (k - 1)) == 0);
4443 for (l = 0; l < k; l++)
4445 tree t;
4446 if (ratype)
4448 t = build_fold_addr_expr (new_temp);
4449 t = build2 (MEM_REF, vectype, t,
4450 build_int_cst (TREE_TYPE (t), l * bytes));
4452 else
4453 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4454 bitsize_int (prec), bitsize_int (l * prec));
4455 gimple *new_stmt
4456 = gimple_build_assign (make_ssa_name (vectype), t);
4457 new_stmt_info
4458 = vect_finish_stmt_generation (vinfo, stmt_info,
4459 new_stmt, gsi);
4461 if (j == 0 && l == 0)
4462 STMT_VINFO_VEC_STMT (stmt_info)
4463 = *vec_stmt = new_stmt_info;
4464 else
4465 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4467 prev_stmt_info = new_stmt_info;
4470 if (ratype)
4471 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4472 continue;
4474 else if (simd_clone_subparts (vectype) > nunits)
4476 unsigned int k = (simd_clone_subparts (vectype)
4477 / simd_clone_subparts (rtype));
4478 gcc_assert ((k & (k - 1)) == 0);
4479 if ((j & (k - 1)) == 0)
4480 vec_alloc (ret_ctor_elts, k);
4481 if (ratype)
4483 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4484 for (m = 0; m < o; m++)
4486 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4487 size_int (m), NULL_TREE, NULL_TREE);
4488 gimple *new_stmt
4489 = gimple_build_assign (make_ssa_name (rtype), tem);
4490 new_stmt_info
4491 = vect_finish_stmt_generation (vinfo, stmt_info,
4492 new_stmt, gsi);
4493 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4494 gimple_assign_lhs (new_stmt));
4496 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4498 else
4499 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4500 if ((j & (k - 1)) != k - 1)
4501 continue;
4502 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4503 gimple *new_stmt
4504 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4505 new_stmt_info
4506 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4508 if ((unsigned) j == k - 1)
4509 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4510 else
4511 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4513 prev_stmt_info = new_stmt_info;
4514 continue;
4516 else if (ratype)
4518 tree t = build_fold_addr_expr (new_temp);
4519 t = build2 (MEM_REF, vectype, t,
4520 build_int_cst (TREE_TYPE (t), 0));
4521 gimple *new_stmt
4522 = gimple_build_assign (make_ssa_name (vec_dest), t);
4523 new_stmt_info
4524 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4525 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4529 if (j == 0)
4530 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4531 else
4532 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4534 prev_stmt_info = new_stmt_info;
4537 vargs.release ();
4539 /* The call in STMT might prevent it from being removed in dce.
4540 We however cannot remove it here, due to the way the ssa name
4541 it defines is mapped to the new definition. So just replace
4542 rhs of the statement with something harmless. */
4544 if (slp_node)
4545 return true;
4547 gimple *new_stmt;
4548 if (scalar_dest)
4550 type = TREE_TYPE (scalar_dest);
4551 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4552 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4554 else
4555 new_stmt = gimple_build_nop ();
4556 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4557 unlink_stmt_vdef (stmt);
4559 return true;
4563 /* Function vect_gen_widened_results_half
4565 Create a vector stmt whose code, type, number of arguments, and result
4566 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4567 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4568 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4569 needs to be created (DECL is a function-decl of a target-builtin).
4570 STMT_INFO is the original scalar stmt that we are vectorizing. */
4572 static gimple *
4573 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4574 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4575 tree vec_dest, gimple_stmt_iterator *gsi,
4576 stmt_vec_info stmt_info)
4578 gimple *new_stmt;
4579 tree new_temp;
4581 /* Generate half of the widened result: */
4582 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4583 if (op_type != binary_op)
4584 vec_oprnd1 = NULL;
4585 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4586 new_temp = make_ssa_name (vec_dest, new_stmt);
4587 gimple_assign_set_lhs (new_stmt, new_temp);
4588 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4590 return new_stmt;
4594 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4595 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4596 containing scalar operand), and for the rest we get a copy with
4597 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4598 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4599 The vectors are collected into VEC_OPRNDS. */
4601 static void
4602 vect_get_loop_based_defs (vec_info *vinfo, tree *oprnd, stmt_vec_info stmt_info,
4603 vec<tree> *vec_oprnds, int multi_step_cvt)
4605 tree vec_oprnd;
4607 /* Get first vector operand. */
4608 /* All the vector operands except the very first one (that is scalar oprnd)
4609 are stmt copies. */
4610 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4611 vec_oprnd = vect_get_vec_def_for_operand (vinfo, *oprnd, stmt_info);
4612 else
4613 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4615 vec_oprnds->quick_push (vec_oprnd);
4617 /* Get second vector operand. */
4618 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4619 vec_oprnds->quick_push (vec_oprnd);
4621 *oprnd = vec_oprnd;
4623 /* For conversion in multiple steps, continue to get operands
4624 recursively. */
4625 if (multi_step_cvt)
4626 vect_get_loop_based_defs (vinfo, oprnd, stmt_info, vec_oprnds,
4627 multi_step_cvt - 1);
4631 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4632 For multi-step conversions store the resulting vectors and call the function
4633 recursively. */
4635 static void
4636 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4637 int multi_step_cvt,
4638 stmt_vec_info stmt_info,
4639 vec<tree> vec_dsts,
4640 gimple_stmt_iterator *gsi,
4641 slp_tree slp_node, enum tree_code code,
4642 stmt_vec_info *prev_stmt_info)
4644 unsigned int i;
4645 tree vop0, vop1, new_tmp, vec_dest;
4647 vec_dest = vec_dsts.pop ();
4649 for (i = 0; i < vec_oprnds->length (); i += 2)
4651 /* Create demotion operation. */
4652 vop0 = (*vec_oprnds)[i];
4653 vop1 = (*vec_oprnds)[i + 1];
4654 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4655 new_tmp = make_ssa_name (vec_dest, new_stmt);
4656 gimple_assign_set_lhs (new_stmt, new_tmp);
4657 stmt_vec_info new_stmt_info
4658 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4660 if (multi_step_cvt)
4661 /* Store the resulting vector for next recursive call. */
4662 (*vec_oprnds)[i/2] = new_tmp;
4663 else
4665 /* This is the last step of the conversion sequence. Store the
4666 vectors in SLP_NODE or in vector info of the scalar statement
4667 (or in STMT_VINFO_RELATED_STMT chain). */
4668 if (slp_node)
4669 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4670 else
4672 if (!*prev_stmt_info)
4673 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4674 else
4675 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4677 *prev_stmt_info = new_stmt_info;
4682 /* For multi-step demotion operations we first generate demotion operations
4683 from the source type to the intermediate types, and then combine the
4684 results (stored in VEC_OPRNDS) in demotion operation to the destination
4685 type. */
4686 if (multi_step_cvt)
4688 /* At each level of recursion we have half of the operands we had at the
4689 previous level. */
4690 vec_oprnds->truncate ((i+1)/2);
4691 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4692 multi_step_cvt - 1,
4693 stmt_info, vec_dsts, gsi,
4694 slp_node, VEC_PACK_TRUNC_EXPR,
4695 prev_stmt_info);
4698 vec_dsts.quick_push (vec_dest);
4702 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4703 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4704 STMT_INFO. For multi-step conversions store the resulting vectors and
4705 call the function recursively. */
4707 static void
4708 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4709 vec<tree> *vec_oprnds0,
4710 vec<tree> *vec_oprnds1,
4711 stmt_vec_info stmt_info, tree vec_dest,
4712 gimple_stmt_iterator *gsi,
4713 enum tree_code code1,
4714 enum tree_code code2, int op_type)
4716 int i;
4717 tree vop0, vop1, new_tmp1, new_tmp2;
4718 gimple *new_stmt1, *new_stmt2;
4719 vec<tree> vec_tmp = vNULL;
4721 vec_tmp.create (vec_oprnds0->length () * 2);
4722 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4724 if (op_type == binary_op)
4725 vop1 = (*vec_oprnds1)[i];
4726 else
4727 vop1 = NULL_TREE;
4729 /* Generate the two halves of promotion operation. */
4730 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4731 op_type, vec_dest, gsi,
4732 stmt_info);
4733 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4734 op_type, vec_dest, gsi,
4735 stmt_info);
4736 if (is_gimple_call (new_stmt1))
4738 new_tmp1 = gimple_call_lhs (new_stmt1);
4739 new_tmp2 = gimple_call_lhs (new_stmt2);
4741 else
4743 new_tmp1 = gimple_assign_lhs (new_stmt1);
4744 new_tmp2 = gimple_assign_lhs (new_stmt2);
4747 /* Store the results for the next step. */
4748 vec_tmp.quick_push (new_tmp1);
4749 vec_tmp.quick_push (new_tmp2);
4752 vec_oprnds0->release ();
4753 *vec_oprnds0 = vec_tmp;
4757 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4758 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4759 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4760 Return true if STMT_INFO is vectorizable in this way. */
4762 static bool
4763 vectorizable_conversion (vec_info *vinfo,
4764 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4765 stmt_vec_info *vec_stmt, slp_tree slp_node,
4766 stmt_vector_for_cost *cost_vec)
4768 tree vec_dest;
4769 tree scalar_dest;
4770 tree op0, op1 = NULL_TREE;
4771 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4772 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4773 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4774 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4775 tree new_temp;
4776 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4777 int ndts = 2;
4778 stmt_vec_info prev_stmt_info;
4779 poly_uint64 nunits_in;
4780 poly_uint64 nunits_out;
4781 tree vectype_out, vectype_in;
4782 int ncopies, i, j;
4783 tree lhs_type, rhs_type;
4784 enum { NARROW, NONE, WIDEN } modifier;
4785 vec<tree> vec_oprnds0 = vNULL;
4786 vec<tree> vec_oprnds1 = vNULL;
4787 tree vop0;
4788 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4789 int multi_step_cvt = 0;
4790 vec<tree> interm_types = vNULL;
4791 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4792 int op_type;
4793 unsigned short fltsz;
4795 /* Is STMT a vectorizable conversion? */
4797 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4798 return false;
4800 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4801 && ! vec_stmt)
4802 return false;
4804 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4805 if (!stmt)
4806 return false;
4808 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4809 return false;
4811 code = gimple_assign_rhs_code (stmt);
4812 if (!CONVERT_EXPR_CODE_P (code)
4813 && code != FIX_TRUNC_EXPR
4814 && code != FLOAT_EXPR
4815 && code != WIDEN_MULT_EXPR
4816 && code != WIDEN_LSHIFT_EXPR)
4817 return false;
4819 op_type = TREE_CODE_LENGTH (code);
4821 /* Check types of lhs and rhs. */
4822 scalar_dest = gimple_assign_lhs (stmt);
4823 lhs_type = TREE_TYPE (scalar_dest);
4824 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4826 op0 = gimple_assign_rhs1 (stmt);
4827 rhs_type = TREE_TYPE (op0);
4829 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4830 && !((INTEGRAL_TYPE_P (lhs_type)
4831 && INTEGRAL_TYPE_P (rhs_type))
4832 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4833 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4834 return false;
4836 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4837 && ((INTEGRAL_TYPE_P (lhs_type)
4838 && !type_has_mode_precision_p (lhs_type))
4839 || (INTEGRAL_TYPE_P (rhs_type)
4840 && !type_has_mode_precision_p (rhs_type))))
4842 if (dump_enabled_p ())
4843 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4844 "type conversion to/from bit-precision unsupported."
4845 "\n");
4846 return false;
4849 /* Check the operands of the operation. */
4850 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4852 if (dump_enabled_p ())
4853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4854 "use not simple.\n");
4855 return false;
4857 if (op_type == binary_op)
4859 bool ok;
4861 op1 = gimple_assign_rhs2 (stmt);
4862 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4863 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4864 OP1. */
4865 if (CONSTANT_CLASS_P (op0))
4866 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4867 else
4868 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4870 if (!ok)
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4874 "use not simple.\n");
4875 return false;
4879 /* If op0 is an external or constant def, infer the vector type
4880 from the scalar type. */
4881 if (!vectype_in)
4882 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4883 if (vec_stmt)
4884 gcc_assert (vectype_in);
4885 if (!vectype_in)
4887 if (dump_enabled_p ())
4888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4889 "no vectype for scalar type %T\n", rhs_type);
4891 return false;
4894 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4895 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4897 if (dump_enabled_p ())
4898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4899 "can't convert between boolean and non "
4900 "boolean vectors %T\n", rhs_type);
4902 return false;
4905 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4906 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4907 if (known_eq (nunits_out, nunits_in))
4908 modifier = NONE;
4909 else if (multiple_p (nunits_out, nunits_in))
4910 modifier = NARROW;
4911 else
4913 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4914 modifier = WIDEN;
4917 /* Multiple types in SLP are handled by creating the appropriate number of
4918 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4919 case of SLP. */
4920 if (slp_node)
4921 ncopies = 1;
4922 else if (modifier == NARROW)
4923 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4924 else
4925 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4927 /* Sanity check: make sure that at least one copy of the vectorized stmt
4928 needs to be generated. */
4929 gcc_assert (ncopies >= 1);
4931 bool found_mode = false;
4932 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4933 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4934 opt_scalar_mode rhs_mode_iter;
4936 /* Supportable by target? */
4937 switch (modifier)
4939 case NONE:
4940 if (code != FIX_TRUNC_EXPR
4941 && code != FLOAT_EXPR
4942 && !CONVERT_EXPR_CODE_P (code))
4943 return false;
4944 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4945 break;
4946 /* FALLTHRU */
4947 unsupported:
4948 if (dump_enabled_p ())
4949 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4950 "conversion not supported by target.\n");
4951 return false;
4953 case WIDEN:
4954 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4955 vectype_in, &code1, &code2,
4956 &multi_step_cvt, &interm_types))
4958 /* Binary widening operation can only be supported directly by the
4959 architecture. */
4960 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4961 break;
4964 if (code != FLOAT_EXPR
4965 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4966 goto unsupported;
4968 fltsz = GET_MODE_SIZE (lhs_mode);
4969 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4971 rhs_mode = rhs_mode_iter.require ();
4972 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4973 break;
4975 cvt_type
4976 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4977 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4978 if (cvt_type == NULL_TREE)
4979 goto unsupported;
4981 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4983 if (!supportable_convert_operation (code, vectype_out,
4984 cvt_type, &codecvt1))
4985 goto unsupported;
4987 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4988 vectype_out, cvt_type,
4989 &codecvt1, &codecvt2,
4990 &multi_step_cvt,
4991 &interm_types))
4992 continue;
4993 else
4994 gcc_assert (multi_step_cvt == 0);
4996 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4997 cvt_type,
4998 vectype_in, &code1, &code2,
4999 &multi_step_cvt, &interm_types))
5001 found_mode = true;
5002 break;
5006 if (!found_mode)
5007 goto unsupported;
5009 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5010 codecvt2 = ERROR_MARK;
5011 else
5013 multi_step_cvt++;
5014 interm_types.safe_push (cvt_type);
5015 cvt_type = NULL_TREE;
5017 break;
5019 case NARROW:
5020 gcc_assert (op_type == unary_op);
5021 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5022 &code1, &multi_step_cvt,
5023 &interm_types))
5024 break;
5026 if (code != FIX_TRUNC_EXPR
5027 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5028 goto unsupported;
5030 cvt_type
5031 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5032 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5033 if (cvt_type == NULL_TREE)
5034 goto unsupported;
5035 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5036 &codecvt1))
5037 goto unsupported;
5038 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5039 &code1, &multi_step_cvt,
5040 &interm_types))
5041 break;
5042 goto unsupported;
5044 default:
5045 gcc_unreachable ();
5048 if (!vec_stmt) /* transformation not required. */
5050 DUMP_VECT_SCOPE ("vectorizable_conversion");
5051 if (modifier == NONE)
5053 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5054 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5055 cost_vec);
5057 else if (modifier == NARROW)
5059 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5060 /* The final packing step produces one vector result per copy. */
5061 unsigned int nvectors
5062 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5063 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5064 multi_step_cvt, cost_vec);
5066 else
5068 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5069 /* The initial unpacking step produces two vector results
5070 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5071 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5072 unsigned int nvectors
5073 = (slp_node
5074 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5075 : ncopies * 2);
5076 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5077 multi_step_cvt, cost_vec);
5079 interm_types.release ();
5080 return true;
5083 /* Transform. */
5084 if (dump_enabled_p ())
5085 dump_printf_loc (MSG_NOTE, vect_location,
5086 "transform conversion. ncopies = %d.\n", ncopies);
5088 if (op_type == binary_op)
5090 if (CONSTANT_CLASS_P (op0))
5091 op0 = fold_convert (TREE_TYPE (op1), op0);
5092 else if (CONSTANT_CLASS_P (op1))
5093 op1 = fold_convert (TREE_TYPE (op0), op1);
5096 /* In case of multi-step conversion, we first generate conversion operations
5097 to the intermediate types, and then from that types to the final one.
5098 We create vector destinations for the intermediate type (TYPES) received
5099 from supportable_*_operation, and store them in the correct order
5100 for future use in vect_create_vectorized_*_stmts (). */
5101 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5102 vec_dest = vect_create_destination_var (scalar_dest,
5103 (cvt_type && modifier == WIDEN)
5104 ? cvt_type : vectype_out);
5105 vec_dsts.quick_push (vec_dest);
5107 if (multi_step_cvt)
5109 for (i = interm_types.length () - 1;
5110 interm_types.iterate (i, &intermediate_type); i--)
5112 vec_dest = vect_create_destination_var (scalar_dest,
5113 intermediate_type);
5114 vec_dsts.quick_push (vec_dest);
5118 if (cvt_type)
5119 vec_dest = vect_create_destination_var (scalar_dest,
5120 modifier == WIDEN
5121 ? vectype_out : cvt_type);
5123 if (!slp_node)
5125 if (modifier == WIDEN)
5127 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5128 if (op_type == binary_op)
5129 vec_oprnds1.create (1);
5131 else if (modifier == NARROW)
5132 vec_oprnds0.create (
5133 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5135 else if (code == WIDEN_LSHIFT_EXPR)
5136 vec_oprnds1.create (slp_node->vec_stmts_size);
5138 last_oprnd = op0;
5139 prev_stmt_info = NULL;
5140 switch (modifier)
5142 case NONE:
5143 for (j = 0; j < ncopies; j++)
5145 if (j == 0)
5146 vect_get_vec_defs (vinfo, op0, NULL, stmt_info, &vec_oprnds0,
5147 NULL, slp_node);
5148 else
5149 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5151 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5153 stmt_vec_info new_stmt_info;
5154 /* Arguments are ready, create the new vector stmt. */
5155 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5156 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5157 new_temp = make_ssa_name (vec_dest, new_stmt);
5158 gimple_assign_set_lhs (new_stmt, new_temp);
5159 new_stmt_info
5160 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5162 if (slp_node)
5163 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5164 else
5166 if (!prev_stmt_info)
5167 STMT_VINFO_VEC_STMT (stmt_info)
5168 = *vec_stmt = new_stmt_info;
5169 else
5170 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5171 prev_stmt_info = new_stmt_info;
5175 break;
5177 case WIDEN:
5178 /* In case the vectorization factor (VF) is bigger than the number
5179 of elements that we can fit in a vectype (nunits), we have to
5180 generate more than one vector stmt - i.e - we need to "unroll"
5181 the vector stmt by a factor VF/nunits. */
5182 for (j = 0; j < ncopies; j++)
5184 /* Handle uses. */
5185 if (j == 0)
5187 if (slp_node)
5189 if (code == WIDEN_LSHIFT_EXPR)
5191 unsigned int k;
5193 vec_oprnd1 = op1;
5194 /* Store vec_oprnd1 for every vector stmt to be created
5195 for SLP_NODE. We check during the analysis that all
5196 the shift arguments are the same. */
5197 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5198 vec_oprnds1.quick_push (vec_oprnd1);
5200 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info,
5201 &vec_oprnds0, NULL, slp_node);
5203 else
5204 vect_get_vec_defs (vinfo, op0, op1, stmt_info, &vec_oprnds0,
5205 &vec_oprnds1, slp_node);
5207 else
5209 vec_oprnd0 = vect_get_vec_def_for_operand (vinfo,
5210 op0, stmt_info);
5211 vec_oprnds0.quick_push (vec_oprnd0);
5212 if (op_type == binary_op)
5214 if (code == WIDEN_LSHIFT_EXPR)
5215 vec_oprnd1 = op1;
5216 else
5217 vec_oprnd1
5218 = vect_get_vec_def_for_operand (vinfo,
5219 op1, stmt_info);
5220 vec_oprnds1.quick_push (vec_oprnd1);
5224 else
5226 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5227 vec_oprnds0.truncate (0);
5228 vec_oprnds0.quick_push (vec_oprnd0);
5229 if (op_type == binary_op)
5231 if (code == WIDEN_LSHIFT_EXPR)
5232 vec_oprnd1 = op1;
5233 else
5234 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5235 vec_oprnd1);
5236 vec_oprnds1.truncate (0);
5237 vec_oprnds1.quick_push (vec_oprnd1);
5241 /* Arguments are ready. Create the new vector stmts. */
5242 for (i = multi_step_cvt; i >= 0; i--)
5244 tree this_dest = vec_dsts[i];
5245 enum tree_code c1 = code1, c2 = code2;
5246 if (i == 0 && codecvt2 != ERROR_MARK)
5248 c1 = codecvt1;
5249 c2 = codecvt2;
5251 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5252 &vec_oprnds1, stmt_info,
5253 this_dest, gsi,
5254 c1, c2, op_type);
5257 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5259 stmt_vec_info new_stmt_info;
5260 if (cvt_type)
5262 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5263 new_temp = make_ssa_name (vec_dest);
5264 gassign *new_stmt
5265 = gimple_build_assign (new_temp, codecvt1, vop0);
5266 new_stmt_info
5267 = vect_finish_stmt_generation (vinfo, stmt_info,
5268 new_stmt, gsi);
5270 else
5271 new_stmt_info = vinfo->lookup_def (vop0);
5273 if (slp_node)
5274 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5275 else
5277 if (!prev_stmt_info)
5278 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5279 else
5280 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5281 prev_stmt_info = new_stmt_info;
5286 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5287 break;
5289 case NARROW:
5290 /* In case the vectorization factor (VF) is bigger than the number
5291 of elements that we can fit in a vectype (nunits), we have to
5292 generate more than one vector stmt - i.e - we need to "unroll"
5293 the vector stmt by a factor VF/nunits. */
5294 for (j = 0; j < ncopies; j++)
5296 /* Handle uses. */
5297 if (slp_node)
5298 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info, &vec_oprnds0,
5299 NULL, slp_node);
5300 else
5302 vec_oprnds0.truncate (0);
5303 vect_get_loop_based_defs (vinfo,
5304 &last_oprnd, stmt_info, &vec_oprnds0,
5305 vect_pow2 (multi_step_cvt) - 1);
5308 /* Arguments are ready. Create the new vector stmts. */
5309 if (cvt_type)
5310 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5312 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5313 new_temp = make_ssa_name (vec_dest);
5314 gassign *new_stmt
5315 = gimple_build_assign (new_temp, codecvt1, vop0);
5316 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5317 vec_oprnds0[i] = new_temp;
5320 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5321 multi_step_cvt,
5322 stmt_info, vec_dsts, gsi,
5323 slp_node, code1,
5324 &prev_stmt_info);
5327 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5328 break;
5331 vec_oprnds0.release ();
5332 vec_oprnds1.release ();
5333 interm_types.release ();
5335 return true;
5338 /* Return true if we can assume from the scalar form of STMT_INFO that
5339 neither the scalar nor the vector forms will generate code. STMT_INFO
5340 is known not to involve a data reference. */
5342 bool
5343 vect_nop_conversion_p (stmt_vec_info stmt_info)
5345 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5346 if (!stmt)
5347 return false;
5349 tree lhs = gimple_assign_lhs (stmt);
5350 tree_code code = gimple_assign_rhs_code (stmt);
5351 tree rhs = gimple_assign_rhs1 (stmt);
5353 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5354 return true;
5356 if (CONVERT_EXPR_CODE_P (code))
5357 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5359 return false;
5362 /* Function vectorizable_assignment.
5364 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5365 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5366 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5367 Return true if STMT_INFO is vectorizable in this way. */
5369 static bool
5370 vectorizable_assignment (vec_info *vinfo,
5371 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5372 stmt_vec_info *vec_stmt, slp_tree slp_node,
5373 stmt_vector_for_cost *cost_vec)
5375 tree vec_dest;
5376 tree scalar_dest;
5377 tree op;
5378 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5379 tree new_temp;
5380 enum vect_def_type dt[1] = {vect_unknown_def_type};
5381 int ndts = 1;
5382 int ncopies;
5383 int i, j;
5384 vec<tree> vec_oprnds = vNULL;
5385 tree vop;
5386 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5387 stmt_vec_info prev_stmt_info = NULL;
5388 enum tree_code code;
5389 tree vectype_in;
5391 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5392 return false;
5394 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5395 && ! vec_stmt)
5396 return false;
5398 /* Is vectorizable assignment? */
5399 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5400 if (!stmt)
5401 return false;
5403 scalar_dest = gimple_assign_lhs (stmt);
5404 if (TREE_CODE (scalar_dest) != SSA_NAME)
5405 return false;
5407 code = gimple_assign_rhs_code (stmt);
5408 if (gimple_assign_single_p (stmt)
5409 || code == PAREN_EXPR
5410 || CONVERT_EXPR_CODE_P (code))
5411 op = gimple_assign_rhs1 (stmt);
5412 else
5413 return false;
5415 if (code == VIEW_CONVERT_EXPR)
5416 op = TREE_OPERAND (op, 0);
5418 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5419 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5421 /* Multiple types in SLP are handled by creating the appropriate number of
5422 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5423 case of SLP. */
5424 if (slp_node)
5425 ncopies = 1;
5426 else
5427 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5429 gcc_assert (ncopies >= 1);
5431 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5433 if (dump_enabled_p ())
5434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5435 "use not simple.\n");
5436 return false;
5439 /* We can handle NOP_EXPR conversions that do not change the number
5440 of elements or the vector size. */
5441 if ((CONVERT_EXPR_CODE_P (code)
5442 || code == VIEW_CONVERT_EXPR)
5443 && (!vectype_in
5444 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5445 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5446 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5447 return false;
5449 /* We do not handle bit-precision changes. */
5450 if ((CONVERT_EXPR_CODE_P (code)
5451 || code == VIEW_CONVERT_EXPR)
5452 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5453 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5454 || !type_has_mode_precision_p (TREE_TYPE (op)))
5455 /* But a conversion that does not change the bit-pattern is ok. */
5456 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5457 > TYPE_PRECISION (TREE_TYPE (op)))
5458 && TYPE_UNSIGNED (TREE_TYPE (op)))
5459 /* Conversion between boolean types of different sizes is
5460 a simple assignment in case their vectypes are same
5461 boolean vectors. */
5462 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5463 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5465 if (dump_enabled_p ())
5466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5467 "type conversion to/from bit-precision "
5468 "unsupported.\n");
5469 return false;
5472 if (!vec_stmt) /* transformation not required. */
5474 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5475 DUMP_VECT_SCOPE ("vectorizable_assignment");
5476 if (!vect_nop_conversion_p (stmt_info))
5477 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5478 cost_vec);
5479 return true;
5482 /* Transform. */
5483 if (dump_enabled_p ())
5484 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5486 /* Handle def. */
5487 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5489 /* Handle use. */
5490 for (j = 0; j < ncopies; j++)
5492 /* Handle uses. */
5493 if (j == 0)
5494 vect_get_vec_defs (vinfo, op, NULL, stmt_info, &vec_oprnds, NULL,
5495 slp_node);
5496 else
5497 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5499 /* Arguments are ready. create the new vector stmt. */
5500 stmt_vec_info new_stmt_info = NULL;
5501 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5503 if (CONVERT_EXPR_CODE_P (code)
5504 || code == VIEW_CONVERT_EXPR)
5505 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5506 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5507 new_temp = make_ssa_name (vec_dest, new_stmt);
5508 gimple_assign_set_lhs (new_stmt, new_temp);
5509 new_stmt_info
5510 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5511 if (slp_node)
5512 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5515 if (slp_node)
5516 continue;
5518 if (j == 0)
5519 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5520 else
5521 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5523 prev_stmt_info = new_stmt_info;
5526 vec_oprnds.release ();
5527 return true;
5531 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5532 either as shift by a scalar or by a vector. */
5534 bool
5535 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5538 machine_mode vec_mode;
5539 optab optab;
5540 int icode;
5541 tree vectype;
5543 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5544 if (!vectype)
5545 return false;
5547 optab = optab_for_tree_code (code, vectype, optab_scalar);
5548 if (!optab
5549 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5551 optab = optab_for_tree_code (code, vectype, optab_vector);
5552 if (!optab
5553 || (optab_handler (optab, TYPE_MODE (vectype))
5554 == CODE_FOR_nothing))
5555 return false;
5558 vec_mode = TYPE_MODE (vectype);
5559 icode = (int) optab_handler (optab, vec_mode);
5560 if (icode == CODE_FOR_nothing)
5561 return false;
5563 return true;
5567 /* Function vectorizable_shift.
5569 Check if STMT_INFO performs a shift operation that can be vectorized.
5570 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5571 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5572 Return true if STMT_INFO is vectorizable in this way. */
5574 static bool
5575 vectorizable_shift (vec_info *vinfo,
5576 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5577 stmt_vec_info *vec_stmt, slp_tree slp_node,
5578 stmt_vector_for_cost *cost_vec)
5580 tree vec_dest;
5581 tree scalar_dest;
5582 tree op0, op1 = NULL;
5583 tree vec_oprnd1 = NULL_TREE;
5584 tree vectype;
5585 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5586 enum tree_code code;
5587 machine_mode vec_mode;
5588 tree new_temp;
5589 optab optab;
5590 int icode;
5591 machine_mode optab_op2_mode;
5592 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5593 int ndts = 2;
5594 stmt_vec_info prev_stmt_info;
5595 poly_uint64 nunits_in;
5596 poly_uint64 nunits_out;
5597 tree vectype_out;
5598 tree op1_vectype;
5599 int ncopies;
5600 int j, i;
5601 vec<tree> vec_oprnds0 = vNULL;
5602 vec<tree> vec_oprnds1 = vNULL;
5603 tree vop0, vop1;
5604 unsigned int k;
5605 bool scalar_shift_arg = true;
5606 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5607 bool incompatible_op1_vectype_p = false;
5609 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5610 return false;
5612 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5613 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5614 && ! vec_stmt)
5615 return false;
5617 /* Is STMT a vectorizable binary/unary operation? */
5618 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5619 if (!stmt)
5620 return false;
5622 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5623 return false;
5625 code = gimple_assign_rhs_code (stmt);
5627 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5628 || code == RROTATE_EXPR))
5629 return false;
5631 scalar_dest = gimple_assign_lhs (stmt);
5632 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5633 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5635 if (dump_enabled_p ())
5636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5637 "bit-precision shifts not supported.\n");
5638 return false;
5641 op0 = gimple_assign_rhs1 (stmt);
5642 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5644 if (dump_enabled_p ())
5645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5646 "use not simple.\n");
5647 return false;
5649 /* If op0 is an external or constant def, infer the vector type
5650 from the scalar type. */
5651 if (!vectype)
5652 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5653 if (vec_stmt)
5654 gcc_assert (vectype);
5655 if (!vectype)
5657 if (dump_enabled_p ())
5658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5659 "no vectype for scalar type\n");
5660 return false;
5663 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5664 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5665 if (maybe_ne (nunits_out, nunits_in))
5666 return false;
5668 op1 = gimple_assign_rhs2 (stmt);
5669 stmt_vec_info op1_def_stmt_info;
5670 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5671 &op1_def_stmt_info))
5673 if (dump_enabled_p ())
5674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5675 "use not simple.\n");
5676 return false;
5679 /* Multiple types in SLP are handled by creating the appropriate number of
5680 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5681 case of SLP. */
5682 if (slp_node)
5683 ncopies = 1;
5684 else
5685 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5687 gcc_assert (ncopies >= 1);
5689 /* Determine whether the shift amount is a vector, or scalar. If the
5690 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5692 if ((dt[1] == vect_internal_def
5693 || dt[1] == vect_induction_def
5694 || dt[1] == vect_nested_cycle)
5695 && !slp_node)
5696 scalar_shift_arg = false;
5697 else if (dt[1] == vect_constant_def
5698 || dt[1] == vect_external_def
5699 || dt[1] == vect_internal_def)
5701 /* In SLP, need to check whether the shift count is the same,
5702 in loops if it is a constant or invariant, it is always
5703 a scalar shift. */
5704 if (slp_node)
5706 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5707 stmt_vec_info slpstmt_info;
5709 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5711 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5712 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5713 scalar_shift_arg = false;
5716 /* For internal SLP defs we have to make sure we see scalar stmts
5717 for all vector elements.
5718 ??? For different vectors we could resort to a different
5719 scalar shift operand but code-generation below simply always
5720 takes the first. */
5721 if (dt[1] == vect_internal_def
5722 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5723 stmts.length ()))
5724 scalar_shift_arg = false;
5727 /* If the shift amount is computed by a pattern stmt we cannot
5728 use the scalar amount directly thus give up and use a vector
5729 shift. */
5730 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5731 scalar_shift_arg = false;
5733 else
5735 if (dump_enabled_p ())
5736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5737 "operand mode requires invariant argument.\n");
5738 return false;
5741 /* Vector shifted by vector. */
5742 bool was_scalar_shift_arg = scalar_shift_arg;
5743 if (!scalar_shift_arg)
5745 optab = optab_for_tree_code (code, vectype, optab_vector);
5746 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_NOTE, vect_location,
5748 "vector/vector shift/rotate found.\n");
5750 if (!op1_vectype)
5751 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5752 slp_node);
5753 incompatible_op1_vectype_p
5754 = (op1_vectype == NULL_TREE
5755 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5756 TYPE_VECTOR_SUBPARTS (vectype))
5757 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5758 if (incompatible_op1_vectype_p
5759 && (!slp_node
5760 || SLP_TREE_DEF_TYPE
5761 (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
5763 if (dump_enabled_p ())
5764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5765 "unusable type for last operand in"
5766 " vector/vector shift/rotate.\n");
5767 return false;
5770 /* See if the machine has a vector shifted by scalar insn and if not
5771 then see if it has a vector shifted by vector insn. */
5772 else
5774 optab = optab_for_tree_code (code, vectype, optab_scalar);
5775 if (optab
5776 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5778 if (dump_enabled_p ())
5779 dump_printf_loc (MSG_NOTE, vect_location,
5780 "vector/scalar shift/rotate found.\n");
5782 else
5784 optab = optab_for_tree_code (code, vectype, optab_vector);
5785 if (optab
5786 && (optab_handler (optab, TYPE_MODE (vectype))
5787 != CODE_FOR_nothing))
5789 scalar_shift_arg = false;
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_NOTE, vect_location,
5793 "vector/vector shift/rotate found.\n");
5795 /* Unlike the other binary operators, shifts/rotates have
5796 the rhs being int, instead of the same type as the lhs,
5797 so make sure the scalar is the right type if we are
5798 dealing with vectors of long long/long/short/char. */
5799 incompatible_op1_vectype_p
5800 = !tree_nop_conversion_p (TREE_TYPE (vectype),
5801 TREE_TYPE (op1));
5806 /* Supportable by target? */
5807 if (!optab)
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5811 "no optab.\n");
5812 return false;
5814 vec_mode = TYPE_MODE (vectype);
5815 icode = (int) optab_handler (optab, vec_mode);
5816 if (icode == CODE_FOR_nothing)
5818 if (dump_enabled_p ())
5819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5820 "op not supported by target.\n");
5821 /* Check only during analysis. */
5822 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5823 || (!vec_stmt
5824 && !vect_worthwhile_without_simd_p (vinfo, code)))
5825 return false;
5826 if (dump_enabled_p ())
5827 dump_printf_loc (MSG_NOTE, vect_location,
5828 "proceeding using word mode.\n");
5831 /* Worthwhile without SIMD support? Check only during analysis. */
5832 if (!vec_stmt
5833 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5834 && !vect_worthwhile_without_simd_p (vinfo, code))
5836 if (dump_enabled_p ())
5837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5838 "not worthwhile without SIMD support.\n");
5839 return false;
5842 if (!vec_stmt) /* transformation not required. */
5844 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5845 DUMP_VECT_SCOPE ("vectorizable_shift");
5846 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5847 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5848 return true;
5851 /* Transform. */
5853 if (dump_enabled_p ())
5854 dump_printf_loc (MSG_NOTE, vect_location,
5855 "transform binary/unary operation.\n");
5857 if (incompatible_op1_vectype_p && !slp_node)
5859 op1 = fold_convert (TREE_TYPE (vectype), op1);
5860 if (dt[1] != vect_constant_def)
5861 op1 = vect_init_vector (vinfo, stmt_info, op1,
5862 TREE_TYPE (vectype), NULL);
5865 /* Handle def. */
5866 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5868 prev_stmt_info = NULL;
5869 for (j = 0; j < ncopies; j++)
5871 /* Handle uses. */
5872 if (j == 0)
5874 if (scalar_shift_arg)
5876 /* Vector shl and shr insn patterns can be defined with scalar
5877 operand 2 (shift operand). In this case, use constant or loop
5878 invariant op1 directly, without extending it to vector mode
5879 first. */
5880 optab_op2_mode = insn_data[icode].operand[2].mode;
5881 if (!VECTOR_MODE_P (optab_op2_mode))
5883 if (dump_enabled_p ())
5884 dump_printf_loc (MSG_NOTE, vect_location,
5885 "operand 1 using scalar mode.\n");
5886 vec_oprnd1 = op1;
5887 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5888 vec_oprnds1.quick_push (vec_oprnd1);
5889 if (slp_node)
5891 /* Store vec_oprnd1 for every vector stmt to be created
5892 for SLP_NODE. We check during the analysis that all
5893 the shift arguments are the same.
5894 TODO: Allow different constants for different vector
5895 stmts generated for an SLP instance. */
5896 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5897 vec_oprnds1.quick_push (vec_oprnd1);
5901 else if (slp_node && incompatible_op1_vectype_p)
5903 if (was_scalar_shift_arg)
5905 /* If the argument was the same in all lanes create
5906 the correctly typed vector shift amount directly. */
5907 op1 = fold_convert (TREE_TYPE (vectype), op1);
5908 op1 = vect_init_vector (vinfo, stmt_info,
5909 op1, TREE_TYPE (vectype),
5910 !loop_vinfo ? gsi : NULL);
5911 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5912 !loop_vinfo ? gsi : NULL);
5913 vec_oprnds1.create (slp_node->vec_stmts_size);
5914 for (k = 0; k < slp_node->vec_stmts_size; k++)
5915 vec_oprnds1.quick_push (vec_oprnd1);
5917 else if (dt[1] == vect_constant_def)
5919 /* Convert the scalar constant shift amounts in-place. */
5920 slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
5921 gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
5922 for (unsigned i = 0;
5923 i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
5925 SLP_TREE_SCALAR_OPS (shift)[i]
5926 = fold_convert (TREE_TYPE (vectype),
5927 SLP_TREE_SCALAR_OPS (shift)[i]);
5928 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
5929 == INTEGER_CST));
5932 else
5933 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5936 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5937 (a special case for certain kind of vector shifts); otherwise,
5938 operand 1 should be of a vector type (the usual case). */
5939 if (vec_oprnd1)
5940 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info,
5941 &vec_oprnds0, NULL, slp_node);
5942 else
5943 vect_get_vec_defs (vinfo, op0, op1, stmt_info,
5944 &vec_oprnds0, &vec_oprnds1, slp_node);
5946 else
5947 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5949 /* Arguments are ready. Create the new vector stmt. */
5950 stmt_vec_info new_stmt_info = NULL;
5951 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5953 vop1 = vec_oprnds1[i];
5954 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5955 new_temp = make_ssa_name (vec_dest, new_stmt);
5956 gimple_assign_set_lhs (new_stmt, new_temp);
5957 new_stmt_info
5958 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5959 if (slp_node)
5960 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5963 if (slp_node)
5964 continue;
5966 if (j == 0)
5967 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5968 else
5969 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5970 prev_stmt_info = new_stmt_info;
5973 vec_oprnds0.release ();
5974 vec_oprnds1.release ();
5976 return true;
5980 /* Function vectorizable_operation.
5982 Check if STMT_INFO performs a binary, unary or ternary operation that can
5983 be vectorized.
5984 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5985 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5986 Return true if STMT_INFO is vectorizable in this way. */
5988 static bool
5989 vectorizable_operation (vec_info *vinfo,
5990 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5991 stmt_vec_info *vec_stmt, slp_tree slp_node,
5992 stmt_vector_for_cost *cost_vec)
5994 tree vec_dest;
5995 tree scalar_dest;
5996 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5997 tree vectype;
5998 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5999 enum tree_code code, orig_code;
6000 machine_mode vec_mode;
6001 tree new_temp;
6002 int op_type;
6003 optab optab;
6004 bool target_support_p;
6005 enum vect_def_type dt[3]
6006 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6007 int ndts = 3;
6008 stmt_vec_info prev_stmt_info;
6009 poly_uint64 nunits_in;
6010 poly_uint64 nunits_out;
6011 tree vectype_out;
6012 int ncopies, vec_num;
6013 int j, i;
6014 vec<tree> vec_oprnds0 = vNULL;
6015 vec<tree> vec_oprnds1 = vNULL;
6016 vec<tree> vec_oprnds2 = vNULL;
6017 tree vop0, vop1, vop2;
6018 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6020 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6021 return false;
6023 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6024 && ! vec_stmt)
6025 return false;
6027 /* Is STMT a vectorizable binary/unary operation? */
6028 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6029 if (!stmt)
6030 return false;
6032 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6033 return false;
6035 orig_code = code = gimple_assign_rhs_code (stmt);
6037 /* Shifts are handled in vectorizable_shift. */
6038 if (code == LSHIFT_EXPR
6039 || code == RSHIFT_EXPR
6040 || code == LROTATE_EXPR
6041 || code == RROTATE_EXPR)
6042 return false;
6044 /* Comparisons are handled in vectorizable_comparison. */
6045 if (TREE_CODE_CLASS (code) == tcc_comparison)
6046 return false;
6048 /* Conditions are handled in vectorizable_condition. */
6049 if (code == COND_EXPR)
6050 return false;
6052 /* For pointer addition and subtraction, we should use the normal
6053 plus and minus for the vector operation. */
6054 if (code == POINTER_PLUS_EXPR)
6055 code = PLUS_EXPR;
6056 if (code == POINTER_DIFF_EXPR)
6057 code = MINUS_EXPR;
6059 /* Support only unary or binary operations. */
6060 op_type = TREE_CODE_LENGTH (code);
6061 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6063 if (dump_enabled_p ())
6064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6065 "num. args = %d (not unary/binary/ternary op).\n",
6066 op_type);
6067 return false;
6070 scalar_dest = gimple_assign_lhs (stmt);
6071 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6073 /* Most operations cannot handle bit-precision types without extra
6074 truncations. */
6075 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6076 if (!mask_op_p
6077 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6078 /* Exception are bitwise binary operations. */
6079 && code != BIT_IOR_EXPR
6080 && code != BIT_XOR_EXPR
6081 && code != BIT_AND_EXPR)
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6085 "bit-precision arithmetic not supported.\n");
6086 return false;
6089 op0 = gimple_assign_rhs1 (stmt);
6090 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
6092 if (dump_enabled_p ())
6093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6094 "use not simple.\n");
6095 return false;
6097 /* If op0 is an external or constant def, infer the vector type
6098 from the scalar type. */
6099 if (!vectype)
6101 /* For boolean type we cannot determine vectype by
6102 invariant value (don't know whether it is a vector
6103 of booleans or vector of integers). We use output
6104 vectype because operations on boolean don't change
6105 type. */
6106 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6108 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6110 if (dump_enabled_p ())
6111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6112 "not supported operation on bool value.\n");
6113 return false;
6115 vectype = vectype_out;
6117 else
6118 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6119 slp_node);
6121 if (vec_stmt)
6122 gcc_assert (vectype);
6123 if (!vectype)
6125 if (dump_enabled_p ())
6126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6127 "no vectype for scalar type %T\n",
6128 TREE_TYPE (op0));
6130 return false;
6133 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6134 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6135 if (maybe_ne (nunits_out, nunits_in))
6136 return false;
6138 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6139 if (op_type == binary_op || op_type == ternary_op)
6141 op1 = gimple_assign_rhs2 (stmt);
6142 if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2))
6144 if (dump_enabled_p ())
6145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6146 "use not simple.\n");
6147 return false;
6150 if (op_type == ternary_op)
6152 op2 = gimple_assign_rhs3 (stmt);
6153 if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3))
6155 if (dump_enabled_p ())
6156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6157 "use not simple.\n");
6158 return false;
6162 /* Multiple types in SLP are handled by creating the appropriate number of
6163 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6164 case of SLP. */
6165 if (slp_node)
6167 ncopies = 1;
6168 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6170 else
6172 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6173 vec_num = 1;
6176 gcc_assert (ncopies >= 1);
6178 /* Reject attempts to combine mask types with nonmask types, e.g. if
6179 we have an AND between a (nonmask) boolean loaded from memory and
6180 a (mask) boolean result of a comparison.
6182 TODO: We could easily fix these cases up using pattern statements. */
6183 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6184 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6185 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6187 if (dump_enabled_p ())
6188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6189 "mixed mask and nonmask vector types\n");
6190 return false;
6193 /* Supportable by target? */
6195 vec_mode = TYPE_MODE (vectype);
6196 if (code == MULT_HIGHPART_EXPR)
6197 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6198 else
6200 optab = optab_for_tree_code (code, vectype, optab_default);
6201 if (!optab)
6203 if (dump_enabled_p ())
6204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6205 "no optab.\n");
6206 return false;
6208 target_support_p = (optab_handler (optab, vec_mode)
6209 != CODE_FOR_nothing);
6212 if (!target_support_p)
6214 if (dump_enabled_p ())
6215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6216 "op not supported by target.\n");
6217 /* Check only during analysis. */
6218 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6219 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6220 return false;
6221 if (dump_enabled_p ())
6222 dump_printf_loc (MSG_NOTE, vect_location,
6223 "proceeding using word mode.\n");
6226 /* Worthwhile without SIMD support? Check only during analysis. */
6227 if (!VECTOR_MODE_P (vec_mode)
6228 && !vec_stmt
6229 && !vect_worthwhile_without_simd_p (vinfo, code))
6231 if (dump_enabled_p ())
6232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6233 "not worthwhile without SIMD support.\n");
6234 return false;
6237 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6238 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6239 internal_fn cond_fn = get_conditional_internal_fn (code);
6241 if (!vec_stmt) /* transformation not required. */
6243 /* If this operation is part of a reduction, a fully-masked loop
6244 should only change the active lanes of the reduction chain,
6245 keeping the inactive lanes as-is. */
6246 if (loop_vinfo
6247 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6248 && reduc_idx >= 0)
6250 if (cond_fn == IFN_LAST
6251 || !direct_internal_fn_supported_p (cond_fn, vectype,
6252 OPTIMIZE_FOR_SPEED))
6254 if (dump_enabled_p ())
6255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6256 "can't use a fully-masked loop because no"
6257 " conditional operation is available.\n");
6258 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6260 else
6261 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6262 vectype, NULL);
6265 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6266 DUMP_VECT_SCOPE ("vectorizable_operation");
6267 vect_model_simple_cost (vinfo, stmt_info,
6268 ncopies, dt, ndts, slp_node, cost_vec);
6269 return true;
6272 /* Transform. */
6274 if (dump_enabled_p ())
6275 dump_printf_loc (MSG_NOTE, vect_location,
6276 "transform binary/unary operation.\n");
6278 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6280 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6281 vectors with unsigned elements, but the result is signed. So, we
6282 need to compute the MINUS_EXPR into vectype temporary and
6283 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6284 tree vec_cvt_dest = NULL_TREE;
6285 if (orig_code == POINTER_DIFF_EXPR)
6287 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6288 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6290 /* Handle def. */
6291 else
6292 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6294 /* In case the vectorization factor (VF) is bigger than the number
6295 of elements that we can fit in a vectype (nunits), we have to generate
6296 more than one vector stmt - i.e - we need to "unroll" the
6297 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6298 from one copy of the vector stmt to the next, in the field
6299 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6300 stages to find the correct vector defs to be used when vectorizing
6301 stmts that use the defs of the current stmt. The example below
6302 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6303 we need to create 4 vectorized stmts):
6305 before vectorization:
6306 RELATED_STMT VEC_STMT
6307 S1: x = memref - -
6308 S2: z = x + 1 - -
6310 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6311 there):
6312 RELATED_STMT VEC_STMT
6313 VS1_0: vx0 = memref0 VS1_1 -
6314 VS1_1: vx1 = memref1 VS1_2 -
6315 VS1_2: vx2 = memref2 VS1_3 -
6316 VS1_3: vx3 = memref3 - -
6317 S1: x = load - VS1_0
6318 S2: z = x + 1 - -
6320 step2: vectorize stmt S2 (done here):
6321 To vectorize stmt S2 we first need to find the relevant vector
6322 def for the first operand 'x'. This is, as usual, obtained from
6323 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6324 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6325 relevant vector def 'vx0'. Having found 'vx0' we can generate
6326 the vector stmt VS2_0, and as usual, record it in the
6327 STMT_VINFO_VEC_STMT of stmt S2.
6328 When creating the second copy (VS2_1), we obtain the relevant vector
6329 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6330 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6331 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6332 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6333 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6334 chain of stmts and pointers:
6335 RELATED_STMT VEC_STMT
6336 VS1_0: vx0 = memref0 VS1_1 -
6337 VS1_1: vx1 = memref1 VS1_2 -
6338 VS1_2: vx2 = memref2 VS1_3 -
6339 VS1_3: vx3 = memref3 - -
6340 S1: x = load - VS1_0
6341 VS2_0: vz0 = vx0 + v1 VS2_1 -
6342 VS2_1: vz1 = vx1 + v1 VS2_2 -
6343 VS2_2: vz2 = vx2 + v1 VS2_3 -
6344 VS2_3: vz3 = vx3 + v1 - -
6345 S2: z = x + 1 - VS2_0 */
6347 prev_stmt_info = NULL;
6348 for (j = 0; j < ncopies; j++)
6350 /* Handle uses. */
6351 if (j == 0)
6353 if (op_type == binary_op)
6354 vect_get_vec_defs (vinfo, op0, op1, stmt_info,
6355 &vec_oprnds0, &vec_oprnds1, slp_node);
6356 else if (op_type == ternary_op)
6358 if (slp_node)
6360 auto_vec<vec<tree> > vec_defs(3);
6361 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
6362 vec_oprnds0 = vec_defs[0];
6363 vec_oprnds1 = vec_defs[1];
6364 vec_oprnds2 = vec_defs[2];
6366 else
6368 vect_get_vec_defs (vinfo, op0, op1, stmt_info, &vec_oprnds0,
6369 &vec_oprnds1, NULL);
6370 vect_get_vec_defs (vinfo, op2, NULL_TREE, stmt_info,
6371 &vec_oprnds2, NULL, NULL);
6374 else
6375 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info, &vec_oprnds0,
6376 NULL, slp_node);
6378 else
6380 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6381 if (op_type == ternary_op)
6383 tree vec_oprnd = vec_oprnds2.pop ();
6384 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6385 vec_oprnd));
6389 /* Arguments are ready. Create the new vector stmt. */
6390 stmt_vec_info new_stmt_info = NULL;
6391 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6393 vop1 = ((op_type == binary_op || op_type == ternary_op)
6394 ? vec_oprnds1[i] : NULL_TREE);
6395 vop2 = ((op_type == ternary_op)
6396 ? vec_oprnds2[i] : NULL_TREE);
6397 if (masked_loop_p && reduc_idx >= 0)
6399 /* Perform the operation on active elements only and take
6400 inactive elements from the reduction chain input. */
6401 gcc_assert (!vop2);
6402 vop2 = reduc_idx == 1 ? vop1 : vop0;
6403 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6404 vectype, i * ncopies + j);
6405 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6406 vop0, vop1, vop2);
6407 new_temp = make_ssa_name (vec_dest, call);
6408 gimple_call_set_lhs (call, new_temp);
6409 gimple_call_set_nothrow (call, true);
6410 new_stmt_info
6411 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6413 else
6415 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6416 vop0, vop1, vop2);
6417 new_temp = make_ssa_name (vec_dest, new_stmt);
6418 gimple_assign_set_lhs (new_stmt, new_temp);
6419 new_stmt_info
6420 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6421 if (vec_cvt_dest)
6423 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6424 gassign *new_stmt
6425 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6426 new_temp);
6427 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6428 gimple_assign_set_lhs (new_stmt, new_temp);
6429 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
6430 new_stmt, gsi);
6433 if (slp_node)
6434 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6437 if (slp_node)
6438 continue;
6440 if (j == 0)
6441 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6442 else
6443 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6444 prev_stmt_info = new_stmt_info;
6447 vec_oprnds0.release ();
6448 vec_oprnds1.release ();
6449 vec_oprnds2.release ();
6451 return true;
6454 /* A helper function to ensure data reference DR_INFO's base alignment. */
6456 static void
6457 ensure_base_align (dr_vec_info *dr_info)
6459 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6460 return;
6462 if (dr_info->base_misaligned)
6464 tree base_decl = dr_info->base_decl;
6466 // We should only be able to increase the alignment of a base object if
6467 // we know what its new alignment should be at compile time.
6468 unsigned HOST_WIDE_INT align_base_to =
6469 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6471 if (decl_in_symtab_p (base_decl))
6472 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6473 else if (DECL_ALIGN (base_decl) < align_base_to)
6475 SET_DECL_ALIGN (base_decl, align_base_to);
6476 DECL_USER_ALIGN (base_decl) = 1;
6478 dr_info->base_misaligned = false;
6483 /* Function get_group_alias_ptr_type.
6485 Return the alias type for the group starting at FIRST_STMT_INFO. */
6487 static tree
6488 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6490 struct data_reference *first_dr, *next_dr;
6492 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6493 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6494 while (next_stmt_info)
6496 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6497 if (get_alias_set (DR_REF (first_dr))
6498 != get_alias_set (DR_REF (next_dr)))
6500 if (dump_enabled_p ())
6501 dump_printf_loc (MSG_NOTE, vect_location,
6502 "conflicting alias set types.\n");
6503 return ptr_type_node;
6505 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6507 return reference_alias_ptr_type (DR_REF (first_dr));
6511 /* Function scan_operand_equal_p.
6513 Helper function for check_scan_store. Compare two references
6514 with .GOMP_SIMD_LANE bases. */
6516 static bool
6517 scan_operand_equal_p (tree ref1, tree ref2)
6519 tree ref[2] = { ref1, ref2 };
6520 poly_int64 bitsize[2], bitpos[2];
6521 tree offset[2], base[2];
6522 for (int i = 0; i < 2; ++i)
6524 machine_mode mode;
6525 int unsignedp, reversep, volatilep = 0;
6526 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6527 &offset[i], &mode, &unsignedp,
6528 &reversep, &volatilep);
6529 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6530 return false;
6531 if (TREE_CODE (base[i]) == MEM_REF
6532 && offset[i] == NULL_TREE
6533 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6535 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6536 if (is_gimple_assign (def_stmt)
6537 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6538 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6539 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6541 if (maybe_ne (mem_ref_offset (base[i]), 0))
6542 return false;
6543 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6544 offset[i] = gimple_assign_rhs2 (def_stmt);
6549 if (!operand_equal_p (base[0], base[1], 0))
6550 return false;
6551 if (maybe_ne (bitsize[0], bitsize[1]))
6552 return false;
6553 if (offset[0] != offset[1])
6555 if (!offset[0] || !offset[1])
6556 return false;
6557 if (!operand_equal_p (offset[0], offset[1], 0))
6559 tree step[2];
6560 for (int i = 0; i < 2; ++i)
6562 step[i] = integer_one_node;
6563 if (TREE_CODE (offset[i]) == SSA_NAME)
6565 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6566 if (is_gimple_assign (def_stmt)
6567 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6568 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6569 == INTEGER_CST))
6571 step[i] = gimple_assign_rhs2 (def_stmt);
6572 offset[i] = gimple_assign_rhs1 (def_stmt);
6575 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6577 step[i] = TREE_OPERAND (offset[i], 1);
6578 offset[i] = TREE_OPERAND (offset[i], 0);
6580 tree rhs1 = NULL_TREE;
6581 if (TREE_CODE (offset[i]) == SSA_NAME)
6583 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6584 if (gimple_assign_cast_p (def_stmt))
6585 rhs1 = gimple_assign_rhs1 (def_stmt);
6587 else if (CONVERT_EXPR_P (offset[i]))
6588 rhs1 = TREE_OPERAND (offset[i], 0);
6589 if (rhs1
6590 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6591 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6592 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6593 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6594 offset[i] = rhs1;
6596 if (!operand_equal_p (offset[0], offset[1], 0)
6597 || !operand_equal_p (step[0], step[1], 0))
6598 return false;
6601 return true;
6605 enum scan_store_kind {
6606 /* Normal permutation. */
6607 scan_store_kind_perm,
6609 /* Whole vector left shift permutation with zero init. */
6610 scan_store_kind_lshift_zero,
6612 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6613 scan_store_kind_lshift_cond
6616 /* Function check_scan_store.
6618 Verify if we can perform the needed permutations or whole vector shifts.
6619 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6620 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6621 to do at each step. */
6623 static int
6624 scan_store_can_perm_p (tree vectype, tree init,
6625 vec<enum scan_store_kind> *use_whole_vector = NULL)
6627 enum machine_mode vec_mode = TYPE_MODE (vectype);
6628 unsigned HOST_WIDE_INT nunits;
6629 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6630 return -1;
6631 int units_log2 = exact_log2 (nunits);
6632 if (units_log2 <= 0)
6633 return -1;
6635 int i;
6636 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6637 for (i = 0; i <= units_log2; ++i)
6639 unsigned HOST_WIDE_INT j, k;
6640 enum scan_store_kind kind = scan_store_kind_perm;
6641 vec_perm_builder sel (nunits, nunits, 1);
6642 sel.quick_grow (nunits);
6643 if (i == units_log2)
6645 for (j = 0; j < nunits; ++j)
6646 sel[j] = nunits - 1;
6648 else
6650 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6651 sel[j] = j;
6652 for (k = 0; j < nunits; ++j, ++k)
6653 sel[j] = nunits + k;
6655 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6656 if (!can_vec_perm_const_p (vec_mode, indices))
6658 if (i == units_log2)
6659 return -1;
6661 if (whole_vector_shift_kind == scan_store_kind_perm)
6663 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6664 return -1;
6665 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6666 /* Whole vector shifts shift in zeros, so if init is all zero
6667 constant, there is no need to do anything further. */
6668 if ((TREE_CODE (init) != INTEGER_CST
6669 && TREE_CODE (init) != REAL_CST)
6670 || !initializer_zerop (init))
6672 tree masktype = truth_type_for (vectype);
6673 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6674 return -1;
6675 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6678 kind = whole_vector_shift_kind;
6680 if (use_whole_vector)
6682 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6683 use_whole_vector->safe_grow_cleared (i);
6684 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6685 use_whole_vector->safe_push (kind);
6689 return units_log2;
6693 /* Function check_scan_store.
6695 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6697 static bool
6698 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6699 enum vect_def_type rhs_dt, bool slp, tree mask,
6700 vect_memory_access_type memory_access_type)
6702 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6703 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6704 tree ref_type;
6706 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6707 if (slp
6708 || mask
6709 || memory_access_type != VMAT_CONTIGUOUS
6710 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6711 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6712 || loop_vinfo == NULL
6713 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6714 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6715 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6716 || !integer_zerop (DR_INIT (dr_info->dr))
6717 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6718 || !alias_sets_conflict_p (get_alias_set (vectype),
6719 get_alias_set (TREE_TYPE (ref_type))))
6721 if (dump_enabled_p ())
6722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6723 "unsupported OpenMP scan store.\n");
6724 return false;
6727 /* We need to pattern match code built by OpenMP lowering and simplified
6728 by following optimizations into something we can handle.
6729 #pragma omp simd reduction(inscan,+:r)
6730 for (...)
6732 r += something ();
6733 #pragma omp scan inclusive (r)
6734 use (r);
6736 shall have body with:
6737 // Initialization for input phase, store the reduction initializer:
6738 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6739 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6740 D.2042[_21] = 0;
6741 // Actual input phase:
6743 r.0_5 = D.2042[_20];
6744 _6 = _4 + r.0_5;
6745 D.2042[_20] = _6;
6746 // Initialization for scan phase:
6747 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6748 _26 = D.2043[_25];
6749 _27 = D.2042[_25];
6750 _28 = _26 + _27;
6751 D.2043[_25] = _28;
6752 D.2042[_25] = _28;
6753 // Actual scan phase:
6755 r.1_8 = D.2042[_20];
6757 The "omp simd array" variable D.2042 holds the privatized copy used
6758 inside of the loop and D.2043 is another one that holds copies of
6759 the current original list item. The separate GOMP_SIMD_LANE ifn
6760 kinds are there in order to allow optimizing the initializer store
6761 and combiner sequence, e.g. if it is originally some C++ish user
6762 defined reduction, but allow the vectorizer to pattern recognize it
6763 and turn into the appropriate vectorized scan.
6765 For exclusive scan, this is slightly different:
6766 #pragma omp simd reduction(inscan,+:r)
6767 for (...)
6769 use (r);
6770 #pragma omp scan exclusive (r)
6771 r += something ();
6773 shall have body with:
6774 // Initialization for input phase, store the reduction initializer:
6775 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6776 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6777 D.2042[_21] = 0;
6778 // Actual input phase:
6780 r.0_5 = D.2042[_20];
6781 _6 = _4 + r.0_5;
6782 D.2042[_20] = _6;
6783 // Initialization for scan phase:
6784 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6785 _26 = D.2043[_25];
6786 D.2044[_25] = _26;
6787 _27 = D.2042[_25];
6788 _28 = _26 + _27;
6789 D.2043[_25] = _28;
6790 // Actual scan phase:
6792 r.1_8 = D.2044[_20];
6793 ... */
6795 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6797 /* Match the D.2042[_21] = 0; store above. Just require that
6798 it is a constant or external definition store. */
6799 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6801 fail_init:
6802 if (dump_enabled_p ())
6803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6804 "unsupported OpenMP scan initializer store.\n");
6805 return false;
6808 if (! loop_vinfo->scan_map)
6809 loop_vinfo->scan_map = new hash_map<tree, tree>;
6810 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6811 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6812 if (cached)
6813 goto fail_init;
6814 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6816 /* These stores can be vectorized normally. */
6817 return true;
6820 if (rhs_dt != vect_internal_def)
6822 fail:
6823 if (dump_enabled_p ())
6824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6825 "unsupported OpenMP scan combiner pattern.\n");
6826 return false;
6829 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6830 tree rhs = gimple_assign_rhs1 (stmt);
6831 if (TREE_CODE (rhs) != SSA_NAME)
6832 goto fail;
6834 gimple *other_store_stmt = NULL;
6835 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6836 bool inscan_var_store
6837 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6839 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6841 if (!inscan_var_store)
6843 use_operand_p use_p;
6844 imm_use_iterator iter;
6845 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6847 gimple *use_stmt = USE_STMT (use_p);
6848 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6849 continue;
6850 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6851 || !is_gimple_assign (use_stmt)
6852 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6853 || other_store_stmt
6854 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6855 goto fail;
6856 other_store_stmt = use_stmt;
6858 if (other_store_stmt == NULL)
6859 goto fail;
6860 rhs = gimple_assign_lhs (other_store_stmt);
6861 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6862 goto fail;
6865 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6867 use_operand_p use_p;
6868 imm_use_iterator iter;
6869 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6871 gimple *use_stmt = USE_STMT (use_p);
6872 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6873 continue;
6874 if (other_store_stmt)
6875 goto fail;
6876 other_store_stmt = use_stmt;
6879 else
6880 goto fail;
6882 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6883 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6884 || !is_gimple_assign (def_stmt)
6885 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6886 goto fail;
6888 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6889 /* For pointer addition, we should use the normal plus for the vector
6890 operation. */
6891 switch (code)
6893 case POINTER_PLUS_EXPR:
6894 code = PLUS_EXPR;
6895 break;
6896 case MULT_HIGHPART_EXPR:
6897 goto fail;
6898 default:
6899 break;
6901 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6902 goto fail;
6904 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6905 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6906 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6907 goto fail;
6909 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6910 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6911 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6912 || !gimple_assign_load_p (load1_stmt)
6913 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6914 || !gimple_assign_load_p (load2_stmt))
6915 goto fail;
6917 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6918 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6919 if (load1_stmt_info == NULL
6920 || load2_stmt_info == NULL
6921 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6922 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6923 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6924 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6925 goto fail;
6927 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6929 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6930 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6931 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6932 goto fail;
6933 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6934 tree lrhs;
6935 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6936 lrhs = rhs1;
6937 else
6938 lrhs = rhs2;
6939 use_operand_p use_p;
6940 imm_use_iterator iter;
6941 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6943 gimple *use_stmt = USE_STMT (use_p);
6944 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6945 continue;
6946 if (other_store_stmt)
6947 goto fail;
6948 other_store_stmt = use_stmt;
6952 if (other_store_stmt == NULL)
6953 goto fail;
6954 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6955 || !gimple_store_p (other_store_stmt))
6956 goto fail;
6958 stmt_vec_info other_store_stmt_info
6959 = loop_vinfo->lookup_stmt (other_store_stmt);
6960 if (other_store_stmt_info == NULL
6961 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6962 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6963 goto fail;
6965 gimple *stmt1 = stmt;
6966 gimple *stmt2 = other_store_stmt;
6967 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6968 std::swap (stmt1, stmt2);
6969 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6970 gimple_assign_rhs1 (load2_stmt)))
6972 std::swap (rhs1, rhs2);
6973 std::swap (load1_stmt, load2_stmt);
6974 std::swap (load1_stmt_info, load2_stmt_info);
6976 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6977 gimple_assign_rhs1 (load1_stmt)))
6978 goto fail;
6980 tree var3 = NULL_TREE;
6981 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6982 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6983 gimple_assign_rhs1 (load2_stmt)))
6984 goto fail;
6985 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6987 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6988 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6989 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6990 goto fail;
6991 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6992 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6993 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6994 || lookup_attribute ("omp simd inscan exclusive",
6995 DECL_ATTRIBUTES (var3)))
6996 goto fail;
6999 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7000 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7001 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7002 goto fail;
7004 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7005 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7006 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7007 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7008 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7009 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7010 goto fail;
7012 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7013 std::swap (var1, var2);
7015 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7017 if (!lookup_attribute ("omp simd inscan exclusive",
7018 DECL_ATTRIBUTES (var1)))
7019 goto fail;
7020 var1 = var3;
7023 if (loop_vinfo->scan_map == NULL)
7024 goto fail;
7025 tree *init = loop_vinfo->scan_map->get (var1);
7026 if (init == NULL)
7027 goto fail;
7029 /* The IL is as expected, now check if we can actually vectorize it.
7030 Inclusive scan:
7031 _26 = D.2043[_25];
7032 _27 = D.2042[_25];
7033 _28 = _26 + _27;
7034 D.2043[_25] = _28;
7035 D.2042[_25] = _28;
7036 should be vectorized as (where _40 is the vectorized rhs
7037 from the D.2042[_21] = 0; store):
7038 _30 = MEM <vector(8) int> [(int *)&D.2043];
7039 _31 = MEM <vector(8) int> [(int *)&D.2042];
7040 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7041 _33 = _31 + _32;
7042 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7043 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7044 _35 = _33 + _34;
7045 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7046 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7047 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7048 _37 = _35 + _36;
7049 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7050 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7051 _38 = _30 + _37;
7052 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7053 MEM <vector(8) int> [(int *)&D.2043] = _39;
7054 MEM <vector(8) int> [(int *)&D.2042] = _38;
7055 Exclusive scan:
7056 _26 = D.2043[_25];
7057 D.2044[_25] = _26;
7058 _27 = D.2042[_25];
7059 _28 = _26 + _27;
7060 D.2043[_25] = _28;
7061 should be vectorized as (where _40 is the vectorized rhs
7062 from the D.2042[_21] = 0; store):
7063 _30 = MEM <vector(8) int> [(int *)&D.2043];
7064 _31 = MEM <vector(8) int> [(int *)&D.2042];
7065 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7066 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7067 _34 = _32 + _33;
7068 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7069 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7070 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7071 _36 = _34 + _35;
7072 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7073 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7074 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7075 _38 = _36 + _37;
7076 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7077 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7078 _39 = _30 + _38;
7079 _50 = _31 + _39;
7080 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7081 MEM <vector(8) int> [(int *)&D.2044] = _39;
7082 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7083 enum machine_mode vec_mode = TYPE_MODE (vectype);
7084 optab optab = optab_for_tree_code (code, vectype, optab_default);
7085 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7086 goto fail;
7088 int units_log2 = scan_store_can_perm_p (vectype, *init);
7089 if (units_log2 == -1)
7090 goto fail;
7092 return true;
7096 /* Function vectorizable_scan_store.
7098 Helper of vectorizable_score, arguments like on vectorizable_store.
7099 Handle only the transformation, checking is done in check_scan_store. */
7101 static bool
7102 vectorizable_scan_store (vec_info *vinfo,
7103 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7104 stmt_vec_info *vec_stmt, int ncopies)
7106 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7107 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7108 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7109 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7111 if (dump_enabled_p ())
7112 dump_printf_loc (MSG_NOTE, vect_location,
7113 "transform scan store. ncopies = %d\n", ncopies);
7115 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7116 tree rhs = gimple_assign_rhs1 (stmt);
7117 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7119 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7120 bool inscan_var_store
7121 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7123 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7125 use_operand_p use_p;
7126 imm_use_iterator iter;
7127 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7129 gimple *use_stmt = USE_STMT (use_p);
7130 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7131 continue;
7132 rhs = gimple_assign_lhs (use_stmt);
7133 break;
7137 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7138 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7139 if (code == POINTER_PLUS_EXPR)
7140 code = PLUS_EXPR;
7141 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7142 && commutative_tree_code (code));
7143 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7144 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7145 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7146 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7147 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7148 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7149 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7150 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7151 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7152 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7153 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7155 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7157 std::swap (rhs1, rhs2);
7158 std::swap (var1, var2);
7159 std::swap (load1_dr_info, load2_dr_info);
7162 tree *init = loop_vinfo->scan_map->get (var1);
7163 gcc_assert (init);
7165 unsigned HOST_WIDE_INT nunits;
7166 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7167 gcc_unreachable ();
7168 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7169 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7170 gcc_assert (units_log2 > 0);
7171 auto_vec<tree, 16> perms;
7172 perms.quick_grow (units_log2 + 1);
7173 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7174 for (int i = 0; i <= units_log2; ++i)
7176 unsigned HOST_WIDE_INT j, k;
7177 vec_perm_builder sel (nunits, nunits, 1);
7178 sel.quick_grow (nunits);
7179 if (i == units_log2)
7180 for (j = 0; j < nunits; ++j)
7181 sel[j] = nunits - 1;
7182 else
7184 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7185 sel[j] = j;
7186 for (k = 0; j < nunits; ++j, ++k)
7187 sel[j] = nunits + k;
7189 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7190 if (!use_whole_vector.is_empty ()
7191 && use_whole_vector[i] != scan_store_kind_perm)
7193 if (zero_vec == NULL_TREE)
7194 zero_vec = build_zero_cst (vectype);
7195 if (masktype == NULL_TREE
7196 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7197 masktype = truth_type_for (vectype);
7198 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7200 else
7201 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7204 stmt_vec_info prev_stmt_info = NULL;
7205 tree vec_oprnd1 = NULL_TREE;
7206 tree vec_oprnd2 = NULL_TREE;
7207 tree vec_oprnd3 = NULL_TREE;
7208 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7209 tree dataref_offset = build_int_cst (ref_type, 0);
7210 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7211 vectype, VMAT_CONTIGUOUS);
7212 tree ldataref_ptr = NULL_TREE;
7213 tree orig = NULL_TREE;
7214 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7215 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7216 for (int j = 0; j < ncopies; j++)
7218 stmt_vec_info new_stmt_info;
7219 if (j == 0)
7221 vec_oprnd1 = vect_get_vec_def_for_operand (vinfo, *init, stmt_info);
7222 if (ldataref_ptr == NULL)
7223 vec_oprnd2 = vect_get_vec_def_for_operand (vinfo, rhs1, stmt_info);
7224 vec_oprnd3 = vect_get_vec_def_for_operand (vinfo, rhs2, stmt_info);
7225 orig = vec_oprnd3;
7227 else
7229 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7230 if (ldataref_ptr == NULL)
7231 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7232 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7233 if (!inscan_var_store)
7234 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7237 if (ldataref_ptr)
7239 vec_oprnd2 = make_ssa_name (vectype);
7240 tree data_ref = fold_build2 (MEM_REF, vectype,
7241 unshare_expr (ldataref_ptr),
7242 dataref_offset);
7243 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7244 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7245 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7246 if (prev_stmt_info == NULL)
7247 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7248 else
7249 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7250 prev_stmt_info = new_stmt_info;
7253 tree v = vec_oprnd2;
7254 for (int i = 0; i < units_log2; ++i)
7256 tree new_temp = make_ssa_name (vectype);
7257 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7258 (zero_vec
7259 && (use_whole_vector[i]
7260 != scan_store_kind_perm))
7261 ? zero_vec : vec_oprnd1, v,
7262 perms[i]);
7263 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7264 if (prev_stmt_info == NULL)
7265 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7266 else
7267 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7268 prev_stmt_info = new_stmt_info;
7270 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7272 /* Whole vector shift shifted in zero bits, but if *init
7273 is not initializer_zerop, we need to replace those elements
7274 with elements from vec_oprnd1. */
7275 tree_vector_builder vb (masktype, nunits, 1);
7276 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7277 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7278 ? boolean_false_node : boolean_true_node);
7280 tree new_temp2 = make_ssa_name (vectype);
7281 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7282 new_temp, vec_oprnd1);
7283 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
7284 g, gsi);
7285 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7286 prev_stmt_info = new_stmt_info;
7287 new_temp = new_temp2;
7290 /* For exclusive scan, perform the perms[i] permutation once
7291 more. */
7292 if (i == 0
7293 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7294 && v == vec_oprnd2)
7296 v = new_temp;
7297 --i;
7298 continue;
7301 tree new_temp2 = make_ssa_name (vectype);
7302 g = gimple_build_assign (new_temp2, code, v, new_temp);
7303 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7304 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7305 prev_stmt_info = new_stmt_info;
7307 v = new_temp2;
7310 tree new_temp = make_ssa_name (vectype);
7311 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7312 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7313 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7314 prev_stmt_info = new_stmt_info;
7316 tree last_perm_arg = new_temp;
7317 /* For exclusive scan, new_temp computed above is the exclusive scan
7318 prefix sum. Turn it into inclusive prefix sum for the broadcast
7319 of the last element into orig. */
7320 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7322 last_perm_arg = make_ssa_name (vectype);
7323 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7324 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7325 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7326 prev_stmt_info = new_stmt_info;
7329 orig = make_ssa_name (vectype);
7330 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7331 last_perm_arg, perms[units_log2]);
7332 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7333 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7334 prev_stmt_info = new_stmt_info;
7336 if (!inscan_var_store)
7338 tree data_ref = fold_build2 (MEM_REF, vectype,
7339 unshare_expr (dataref_ptr),
7340 dataref_offset);
7341 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7342 g = gimple_build_assign (data_ref, new_temp);
7343 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7344 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7345 prev_stmt_info = new_stmt_info;
7349 if (inscan_var_store)
7350 for (int j = 0; j < ncopies; j++)
7352 if (j != 0)
7353 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7355 tree data_ref = fold_build2 (MEM_REF, vectype,
7356 unshare_expr (dataref_ptr),
7357 dataref_offset);
7358 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7359 gimple *g = gimple_build_assign (data_ref, orig);
7360 stmt_vec_info new_stmt_info
7361 = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7362 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7363 prev_stmt_info = new_stmt_info;
7365 return true;
7369 /* Function vectorizable_store.
7371 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7372 that can be vectorized.
7373 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7374 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7375 Return true if STMT_INFO is vectorizable in this way. */
7377 static bool
7378 vectorizable_store (vec_info *vinfo,
7379 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7380 stmt_vec_info *vec_stmt, slp_tree slp_node,
7381 stmt_vector_for_cost *cost_vec)
7383 tree data_ref;
7384 tree op;
7385 tree vec_oprnd = NULL_TREE;
7386 tree elem_type;
7387 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7388 class loop *loop = NULL;
7389 machine_mode vec_mode;
7390 tree dummy;
7391 enum dr_alignment_support alignment_support_scheme;
7392 enum vect_def_type rhs_dt = vect_unknown_def_type;
7393 enum vect_def_type mask_dt = vect_unknown_def_type;
7394 stmt_vec_info prev_stmt_info = NULL;
7395 tree dataref_ptr = NULL_TREE;
7396 tree dataref_offset = NULL_TREE;
7397 gimple *ptr_incr = NULL;
7398 int ncopies;
7399 int j;
7400 stmt_vec_info first_stmt_info;
7401 bool grouped_store;
7402 unsigned int group_size, i;
7403 vec<tree> oprnds = vNULL;
7404 vec<tree> result_chain = vNULL;
7405 tree offset = NULL_TREE;
7406 vec<tree> vec_oprnds = vNULL;
7407 bool slp = (slp_node != NULL);
7408 unsigned int vec_num;
7409 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7410 tree aggr_type;
7411 gather_scatter_info gs_info;
7412 poly_uint64 vf;
7413 vec_load_store_type vls_type;
7414 tree ref_type;
7416 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7417 return false;
7419 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7420 && ! vec_stmt)
7421 return false;
7423 /* Is vectorizable store? */
7425 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7426 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7428 tree scalar_dest = gimple_assign_lhs (assign);
7429 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7430 && is_pattern_stmt_p (stmt_info))
7431 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7432 if (TREE_CODE (scalar_dest) != ARRAY_REF
7433 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7434 && TREE_CODE (scalar_dest) != INDIRECT_REF
7435 && TREE_CODE (scalar_dest) != COMPONENT_REF
7436 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7437 && TREE_CODE (scalar_dest) != REALPART_EXPR
7438 && TREE_CODE (scalar_dest) != MEM_REF)
7439 return false;
7441 else
7443 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7444 if (!call || !gimple_call_internal_p (call))
7445 return false;
7447 internal_fn ifn = gimple_call_internal_fn (call);
7448 if (!internal_store_fn_p (ifn))
7449 return false;
7451 if (slp_node != NULL)
7453 if (dump_enabled_p ())
7454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7455 "SLP of masked stores not supported.\n");
7456 return false;
7459 int mask_index = internal_fn_mask_index (ifn);
7460 if (mask_index >= 0)
7462 mask = gimple_call_arg (call, mask_index);
7463 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7464 &mask_vectype))
7465 return false;
7469 op = vect_get_store_rhs (stmt_info);
7471 /* Cannot have hybrid store SLP -- that would mean storing to the
7472 same location twice. */
7473 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7475 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7476 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7478 if (loop_vinfo)
7480 loop = LOOP_VINFO_LOOP (loop_vinfo);
7481 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7483 else
7484 vf = 1;
7486 /* Multiple types in SLP are handled by creating the appropriate number of
7487 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7488 case of SLP. */
7489 if (slp)
7490 ncopies = 1;
7491 else
7492 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7494 gcc_assert (ncopies >= 1);
7496 /* FORNOW. This restriction should be relaxed. */
7497 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7499 if (dump_enabled_p ())
7500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7501 "multiple types in nested loop.\n");
7502 return false;
7505 if (!vect_check_store_rhs (vinfo, stmt_info,
7506 op, &rhs_dt, &rhs_vectype, &vls_type))
7507 return false;
7509 elem_type = TREE_TYPE (vectype);
7510 vec_mode = TYPE_MODE (vectype);
7512 if (!STMT_VINFO_DATA_REF (stmt_info))
7513 return false;
7515 vect_memory_access_type memory_access_type;
7516 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, vls_type,
7517 ncopies, &memory_access_type, &gs_info))
7518 return false;
7520 if (mask)
7522 if (memory_access_type == VMAT_CONTIGUOUS)
7524 if (!VECTOR_MODE_P (vec_mode)
7525 || !can_vec_mask_load_store_p (vec_mode,
7526 TYPE_MODE (mask_vectype), false))
7527 return false;
7529 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7530 && (memory_access_type != VMAT_GATHER_SCATTER
7531 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7533 if (dump_enabled_p ())
7534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7535 "unsupported access type for masked store.\n");
7536 return false;
7539 else
7541 /* FORNOW. In some cases can vectorize even if data-type not supported
7542 (e.g. - array initialization with 0). */
7543 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7544 return false;
7547 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7548 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7549 && memory_access_type != VMAT_GATHER_SCATTER
7550 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7551 if (grouped_store)
7553 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7554 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7555 group_size = DR_GROUP_SIZE (first_stmt_info);
7557 else
7559 first_stmt_info = stmt_info;
7560 first_dr_info = dr_info;
7561 group_size = vec_num = 1;
7564 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7566 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7567 memory_access_type))
7568 return false;
7571 if (!vec_stmt) /* transformation not required. */
7573 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7575 if (loop_vinfo
7576 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7577 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7578 memory_access_type, &gs_info, mask);
7580 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7581 vect_model_store_cost (vinfo, stmt_info, ncopies, rhs_dt,
7582 memory_access_type, vls_type, slp_node, cost_vec);
7583 return true;
7585 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7587 /* Transform. */
7589 ensure_base_align (dr_info);
7591 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7593 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7594 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7595 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7596 tree ptr, var, scale, vec_mask;
7597 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7598 tree mask_halfvectype = mask_vectype;
7599 edge pe = loop_preheader_edge (loop);
7600 gimple_seq seq;
7601 basic_block new_bb;
7602 enum { NARROW, NONE, WIDEN } modifier;
7603 poly_uint64 scatter_off_nunits
7604 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7606 if (known_eq (nunits, scatter_off_nunits))
7607 modifier = NONE;
7608 else if (known_eq (nunits * 2, scatter_off_nunits))
7610 modifier = WIDEN;
7612 /* Currently gathers and scatters are only supported for
7613 fixed-length vectors. */
7614 unsigned int count = scatter_off_nunits.to_constant ();
7615 vec_perm_builder sel (count, count, 1);
7616 for (i = 0; i < (unsigned int) count; ++i)
7617 sel.quick_push (i | (count / 2));
7619 vec_perm_indices indices (sel, 1, count);
7620 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7621 indices);
7622 gcc_assert (perm_mask != NULL_TREE);
7624 else if (known_eq (nunits, scatter_off_nunits * 2))
7626 modifier = NARROW;
7628 /* Currently gathers and scatters are only supported for
7629 fixed-length vectors. */
7630 unsigned int count = nunits.to_constant ();
7631 vec_perm_builder sel (count, count, 1);
7632 for (i = 0; i < (unsigned int) count; ++i)
7633 sel.quick_push (i | (count / 2));
7635 vec_perm_indices indices (sel, 2, count);
7636 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7637 gcc_assert (perm_mask != NULL_TREE);
7638 ncopies *= 2;
7640 if (mask)
7641 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7643 else
7644 gcc_unreachable ();
7646 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7647 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7648 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7649 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7650 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7651 scaletype = TREE_VALUE (arglist);
7653 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7654 && TREE_CODE (rettype) == VOID_TYPE);
7656 ptr = fold_convert (ptrtype, gs_info.base);
7657 if (!is_gimple_min_invariant (ptr))
7659 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7660 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7661 gcc_assert (!new_bb);
7664 if (mask == NULL_TREE)
7666 mask_arg = build_int_cst (masktype, -1);
7667 mask_arg = vect_init_vector (vinfo, stmt_info,
7668 mask_arg, masktype, NULL);
7671 scale = build_int_cst (scaletype, gs_info.scale);
7673 prev_stmt_info = NULL;
7674 for (j = 0; j < ncopies; ++j)
7676 if (j == 0)
7678 src = vec_oprnd1 = vect_get_vec_def_for_operand (vinfo,
7679 op, stmt_info);
7680 op = vec_oprnd0 = vect_get_vec_def_for_operand (vinfo,
7681 gs_info.offset,
7682 stmt_info);
7683 if (mask)
7684 mask_op = vec_mask = vect_get_vec_def_for_operand (vinfo, mask,
7685 stmt_info);
7687 else if (modifier != NONE && (j & 1))
7689 if (modifier == WIDEN)
7692 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7693 vec_oprnd1);
7694 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7695 perm_mask, stmt_info, gsi);
7696 if (mask)
7697 mask_op
7698 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7699 vec_mask);
7701 else if (modifier == NARROW)
7703 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7704 perm_mask, stmt_info, gsi);
7705 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7706 vec_oprnd0);
7708 else
7709 gcc_unreachable ();
7711 else
7713 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7714 vec_oprnd1);
7715 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7716 vec_oprnd0);
7717 if (mask)
7718 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7719 vec_mask);
7722 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7724 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7725 TYPE_VECTOR_SUBPARTS (srctype)));
7726 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7727 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7728 gassign *new_stmt
7729 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7730 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7731 src = var;
7734 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7736 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7737 TYPE_VECTOR_SUBPARTS (idxtype)));
7738 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7739 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7740 gassign *new_stmt
7741 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7742 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7743 op = var;
7746 if (mask)
7748 tree utype;
7749 mask_arg = mask_op;
7750 if (modifier == NARROW)
7752 var = vect_get_new_ssa_name (mask_halfvectype,
7753 vect_simple_var);
7754 gassign *new_stmt
7755 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7756 : VEC_UNPACK_LO_EXPR,
7757 mask_op);
7758 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7759 mask_arg = var;
7761 tree optype = TREE_TYPE (mask_arg);
7762 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7763 utype = masktype;
7764 else
7765 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7766 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7767 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7768 gassign *new_stmt
7769 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7770 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7771 mask_arg = var;
7772 if (!useless_type_conversion_p (masktype, utype))
7774 gcc_assert (TYPE_PRECISION (utype)
7775 <= TYPE_PRECISION (masktype));
7776 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7777 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7778 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7779 mask_arg = var;
7783 gcall *new_stmt
7784 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7785 stmt_vec_info new_stmt_info
7786 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7788 if (prev_stmt_info == NULL)
7789 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7790 else
7791 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7792 prev_stmt_info = new_stmt_info;
7794 return true;
7796 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7797 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7799 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7800 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7802 if (grouped_store)
7804 /* FORNOW */
7805 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7807 /* We vectorize all the stmts of the interleaving group when we
7808 reach the last stmt in the group. */
7809 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7810 < DR_GROUP_SIZE (first_stmt_info)
7811 && !slp)
7813 *vec_stmt = NULL;
7814 return true;
7817 if (slp)
7819 grouped_store = false;
7820 /* VEC_NUM is the number of vect stmts to be created for this
7821 group. */
7822 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7823 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7824 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7825 == first_stmt_info);
7826 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7827 op = vect_get_store_rhs (first_stmt_info);
7829 else
7830 /* VEC_NUM is the number of vect stmts to be created for this
7831 group. */
7832 vec_num = group_size;
7834 ref_type = get_group_alias_ptr_type (first_stmt_info);
7836 else
7837 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7839 if (dump_enabled_p ())
7840 dump_printf_loc (MSG_NOTE, vect_location,
7841 "transform store. ncopies = %d\n", ncopies);
7843 if (memory_access_type == VMAT_ELEMENTWISE
7844 || memory_access_type == VMAT_STRIDED_SLP)
7846 gimple_stmt_iterator incr_gsi;
7847 bool insert_after;
7848 gimple *incr;
7849 tree offvar;
7850 tree ivstep;
7851 tree running_off;
7852 tree stride_base, stride_step, alias_off;
7853 tree vec_oprnd;
7854 tree dr_offset;
7855 unsigned int g;
7856 /* Checked by get_load_store_type. */
7857 unsigned int const_nunits = nunits.to_constant ();
7859 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7860 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7862 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7863 stride_base
7864 = fold_build_pointer_plus
7865 (DR_BASE_ADDRESS (first_dr_info->dr),
7866 size_binop (PLUS_EXPR,
7867 convert_to_ptrofftype (dr_offset),
7868 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7869 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7871 /* For a store with loop-invariant (but other than power-of-2)
7872 stride (i.e. not a grouped access) like so:
7874 for (i = 0; i < n; i += stride)
7875 array[i] = ...;
7877 we generate a new induction variable and new stores from
7878 the components of the (vectorized) rhs:
7880 for (j = 0; ; j += VF*stride)
7881 vectemp = ...;
7882 tmp1 = vectemp[0];
7883 array[j] = tmp1;
7884 tmp2 = vectemp[1];
7885 array[j + stride] = tmp2;
7889 unsigned nstores = const_nunits;
7890 unsigned lnel = 1;
7891 tree ltype = elem_type;
7892 tree lvectype = vectype;
7893 if (slp)
7895 if (group_size < const_nunits
7896 && const_nunits % group_size == 0)
7898 nstores = const_nunits / group_size;
7899 lnel = group_size;
7900 ltype = build_vector_type (elem_type, group_size);
7901 lvectype = vectype;
7903 /* First check if vec_extract optab doesn't support extraction
7904 of vector elts directly. */
7905 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7906 machine_mode vmode;
7907 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7908 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7909 group_size).exists (&vmode)
7910 || (convert_optab_handler (vec_extract_optab,
7911 TYPE_MODE (vectype), vmode)
7912 == CODE_FOR_nothing))
7914 /* Try to avoid emitting an extract of vector elements
7915 by performing the extracts using an integer type of the
7916 same size, extracting from a vector of those and then
7917 re-interpreting it as the original vector type if
7918 supported. */
7919 unsigned lsize
7920 = group_size * GET_MODE_BITSIZE (elmode);
7921 unsigned int lnunits = const_nunits / group_size;
7922 /* If we can't construct such a vector fall back to
7923 element extracts from the original vector type and
7924 element size stores. */
7925 if (int_mode_for_size (lsize, 0).exists (&elmode)
7926 && VECTOR_MODE_P (TYPE_MODE (vectype))
7927 && related_vector_mode (TYPE_MODE (vectype), elmode,
7928 lnunits).exists (&vmode)
7929 && (convert_optab_handler (vec_extract_optab,
7930 vmode, elmode)
7931 != CODE_FOR_nothing))
7933 nstores = lnunits;
7934 lnel = group_size;
7935 ltype = build_nonstandard_integer_type (lsize, 1);
7936 lvectype = build_vector_type (ltype, nstores);
7938 /* Else fall back to vector extraction anyway.
7939 Fewer stores are more important than avoiding spilling
7940 of the vector we extract from. Compared to the
7941 construction case in vectorizable_load no store-forwarding
7942 issue exists here for reasonable archs. */
7945 else if (group_size >= const_nunits
7946 && group_size % const_nunits == 0)
7948 nstores = 1;
7949 lnel = const_nunits;
7950 ltype = vectype;
7951 lvectype = vectype;
7953 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7954 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7957 ivstep = stride_step;
7958 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7959 build_int_cst (TREE_TYPE (ivstep), vf));
7961 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7963 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7964 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7965 create_iv (stride_base, ivstep, NULL,
7966 loop, &incr_gsi, insert_after,
7967 &offvar, NULL);
7968 incr = gsi_stmt (incr_gsi);
7969 loop_vinfo->add_stmt (incr);
7971 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7973 prev_stmt_info = NULL;
7974 alias_off = build_int_cst (ref_type, 0);
7975 stmt_vec_info next_stmt_info = first_stmt_info;
7976 for (g = 0; g < group_size; g++)
7978 running_off = offvar;
7979 if (g)
7981 tree size = TYPE_SIZE_UNIT (ltype);
7982 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7983 size);
7984 tree newoff = copy_ssa_name (running_off, NULL);
7985 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7986 running_off, pos);
7987 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7988 running_off = newoff;
7990 unsigned int group_el = 0;
7991 unsigned HOST_WIDE_INT
7992 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7993 for (j = 0; j < ncopies; j++)
7995 /* We've set op and dt above, from vect_get_store_rhs,
7996 and first_stmt_info == stmt_info. */
7997 if (j == 0)
7999 if (slp)
8001 vect_get_vec_defs (vinfo, op, NULL_TREE, stmt_info,
8002 &vec_oprnds, NULL, slp_node);
8003 vec_oprnd = vec_oprnds[0];
8005 else
8007 op = vect_get_store_rhs (next_stmt_info);
8008 vec_oprnd = vect_get_vec_def_for_operand
8009 (vinfo, op, next_stmt_info);
8012 else
8014 if (slp)
8015 vec_oprnd = vec_oprnds[j];
8016 else
8017 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
8018 vec_oprnd);
8020 /* Pun the vector to extract from if necessary. */
8021 if (lvectype != vectype)
8023 tree tem = make_ssa_name (lvectype);
8024 gimple *pun
8025 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8026 lvectype, vec_oprnd));
8027 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8028 vec_oprnd = tem;
8030 for (i = 0; i < nstores; i++)
8032 tree newref, newoff;
8033 gimple *incr, *assign;
8034 tree size = TYPE_SIZE (ltype);
8035 /* Extract the i'th component. */
8036 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8037 bitsize_int (i), size);
8038 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8039 size, pos);
8041 elem = force_gimple_operand_gsi (gsi, elem, true,
8042 NULL_TREE, true,
8043 GSI_SAME_STMT);
8045 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8046 group_el * elsz);
8047 newref = build2 (MEM_REF, ltype,
8048 running_off, this_off);
8049 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8051 /* And store it to *running_off. */
8052 assign = gimple_build_assign (newref, elem);
8053 stmt_vec_info assign_info
8054 = vect_finish_stmt_generation (vinfo, stmt_info,
8055 assign, gsi);
8057 group_el += lnel;
8058 if (! slp
8059 || group_el == group_size)
8061 newoff = copy_ssa_name (running_off, NULL);
8062 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8063 running_off, stride_step);
8064 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8066 running_off = newoff;
8067 group_el = 0;
8069 if (g == group_size - 1
8070 && !slp)
8072 if (j == 0 && i == 0)
8073 STMT_VINFO_VEC_STMT (stmt_info)
8074 = *vec_stmt = assign_info;
8075 else
8076 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
8077 prev_stmt_info = assign_info;
8081 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8082 if (slp)
8083 break;
8086 vec_oprnds.release ();
8087 return true;
8090 auto_vec<tree> dr_chain (group_size);
8091 oprnds.create (group_size);
8093 /* Gather-scatter accesses perform only component accesses, alignment
8094 is irrelevant for them. */
8095 if (memory_access_type == VMAT_GATHER_SCATTER)
8096 alignment_support_scheme = dr_unaligned_supported;
8097 else
8098 alignment_support_scheme
8099 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
8101 gcc_assert (alignment_support_scheme);
8102 vec_loop_masks *loop_masks
8103 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8104 ? &LOOP_VINFO_MASKS (loop_vinfo)
8105 : NULL);
8106 /* Targets with store-lane instructions must not require explicit
8107 realignment. vect_supportable_dr_alignment always returns either
8108 dr_aligned or dr_unaligned_supported for masked operations. */
8109 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8110 && !mask
8111 && !loop_masks)
8112 || alignment_support_scheme == dr_aligned
8113 || alignment_support_scheme == dr_unaligned_supported);
8115 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8116 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8117 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8119 tree bump;
8120 tree vec_offset = NULL_TREE;
8121 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8123 aggr_type = NULL_TREE;
8124 bump = NULL_TREE;
8126 else if (memory_access_type == VMAT_GATHER_SCATTER)
8128 aggr_type = elem_type;
8129 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8130 &bump, &vec_offset);
8132 else
8134 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8135 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8136 else
8137 aggr_type = vectype;
8138 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8139 memory_access_type);
8142 if (mask)
8143 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8145 /* In case the vectorization factor (VF) is bigger than the number
8146 of elements that we can fit in a vectype (nunits), we have to generate
8147 more than one vector stmt - i.e - we need to "unroll" the
8148 vector stmt by a factor VF/nunits. For more details see documentation in
8149 vect_get_vec_def_for_copy_stmt. */
8151 /* In case of interleaving (non-unit grouped access):
8153 S1: &base + 2 = x2
8154 S2: &base = x0
8155 S3: &base + 1 = x1
8156 S4: &base + 3 = x3
8158 We create vectorized stores starting from base address (the access of the
8159 first stmt in the chain (S2 in the above example), when the last store stmt
8160 of the chain (S4) is reached:
8162 VS1: &base = vx2
8163 VS2: &base + vec_size*1 = vx0
8164 VS3: &base + vec_size*2 = vx1
8165 VS4: &base + vec_size*3 = vx3
8167 Then permutation statements are generated:
8169 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8170 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8173 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8174 (the order of the data-refs in the output of vect_permute_store_chain
8175 corresponds to the order of scalar stmts in the interleaving chain - see
8176 the documentation of vect_permute_store_chain()).
8178 In case of both multiple types and interleaving, above vector stores and
8179 permutation stmts are created for every copy. The result vector stmts are
8180 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8181 STMT_VINFO_RELATED_STMT for the next copies.
8184 prev_stmt_info = NULL;
8185 tree vec_mask = NULL_TREE;
8186 for (j = 0; j < ncopies; j++)
8188 stmt_vec_info new_stmt_info;
8189 if (j == 0)
8191 if (slp)
8193 /* Get vectorized arguments for SLP_NODE. */
8194 vect_get_vec_defs (vinfo, op, NULL_TREE, stmt_info, &vec_oprnds,
8195 NULL, slp_node);
8197 vec_oprnd = vec_oprnds[0];
8199 else
8201 /* For interleaved stores we collect vectorized defs for all the
8202 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8203 used as an input to vect_permute_store_chain(), and OPRNDS as
8204 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8206 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8207 OPRNDS are of size 1. */
8208 stmt_vec_info next_stmt_info = first_stmt_info;
8209 for (i = 0; i < group_size; i++)
8211 /* Since gaps are not supported for interleaved stores,
8212 DR_GROUP_SIZE is the exact number of stmts in the chain.
8213 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8214 that there is no interleaving, DR_GROUP_SIZE is 1,
8215 and only one iteration of the loop will be executed. */
8216 op = vect_get_store_rhs (next_stmt_info);
8217 vec_oprnd = vect_get_vec_def_for_operand
8218 (vinfo, op, next_stmt_info);
8219 dr_chain.quick_push (vec_oprnd);
8220 oprnds.quick_push (vec_oprnd);
8221 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8223 if (mask)
8224 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info,
8225 mask_vectype);
8228 /* We should have catched mismatched types earlier. */
8229 gcc_assert (useless_type_conversion_p (vectype,
8230 TREE_TYPE (vec_oprnd)));
8231 bool simd_lane_access_p
8232 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8233 if (simd_lane_access_p
8234 && !loop_masks
8235 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8236 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8237 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8238 && integer_zerop (DR_INIT (first_dr_info->dr))
8239 && alias_sets_conflict_p (get_alias_set (aggr_type),
8240 get_alias_set (TREE_TYPE (ref_type))))
8242 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8243 dataref_offset = build_int_cst (ref_type, 0);
8245 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8246 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
8247 &dataref_ptr, &vec_offset);
8248 else
8249 dataref_ptr
8250 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8251 simd_lane_access_p ? loop : NULL,
8252 offset, &dummy, gsi, &ptr_incr,
8253 simd_lane_access_p, NULL_TREE, bump);
8255 else
8257 /* For interleaved stores we created vectorized defs for all the
8258 defs stored in OPRNDS in the previous iteration (previous copy).
8259 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8260 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8261 next copy.
8262 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8263 OPRNDS are of size 1. */
8264 for (i = 0; i < group_size; i++)
8266 op = oprnds[i];
8267 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8268 dr_chain[i] = vec_oprnd;
8269 oprnds[i] = vec_oprnd;
8271 if (mask)
8272 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8273 if (dataref_offset)
8274 dataref_offset
8275 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8276 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8277 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8278 else
8279 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8280 stmt_info, bump);
8283 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8285 tree vec_array;
8287 /* Get an array into which we can store the individual vectors. */
8288 vec_array = create_vector_array (vectype, vec_num);
8290 /* Invalidate the current contents of VEC_ARRAY. This should
8291 become an RTL clobber too, which prevents the vector registers
8292 from being upward-exposed. */
8293 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8295 /* Store the individual vectors into the array. */
8296 for (i = 0; i < vec_num; i++)
8298 vec_oprnd = dr_chain[i];
8299 write_vector_array (vinfo, stmt_info,
8300 gsi, vec_oprnd, vec_array, i);
8303 tree final_mask = NULL;
8304 if (loop_masks)
8305 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8306 vectype, j);
8307 if (vec_mask)
8308 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8309 vec_mask, gsi);
8311 gcall *call;
8312 if (final_mask)
8314 /* Emit:
8315 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8316 VEC_ARRAY). */
8317 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8318 tree alias_ptr = build_int_cst (ref_type, align);
8319 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8320 dataref_ptr, alias_ptr,
8321 final_mask, vec_array);
8323 else
8325 /* Emit:
8326 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8327 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8328 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8329 vec_array);
8330 gimple_call_set_lhs (call, data_ref);
8332 gimple_call_set_nothrow (call, true);
8333 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
8334 call, gsi);
8336 /* Record that VEC_ARRAY is now dead. */
8337 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8339 else
8341 new_stmt_info = NULL;
8342 if (grouped_store)
8344 if (j == 0)
8345 result_chain.create (group_size);
8346 /* Permute. */
8347 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8348 gsi, &result_chain);
8351 stmt_vec_info next_stmt_info = first_stmt_info;
8352 for (i = 0; i < vec_num; i++)
8354 unsigned misalign;
8355 unsigned HOST_WIDE_INT align;
8357 tree final_mask = NULL_TREE;
8358 if (loop_masks)
8359 final_mask = vect_get_loop_mask (gsi, loop_masks,
8360 vec_num * ncopies,
8361 vectype, vec_num * j + i);
8362 if (vec_mask)
8363 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8364 vec_mask, gsi);
8366 if (memory_access_type == VMAT_GATHER_SCATTER)
8368 tree scale = size_int (gs_info.scale);
8369 gcall *call;
8370 if (loop_masks)
8371 call = gimple_build_call_internal
8372 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8373 scale, vec_oprnd, final_mask);
8374 else
8375 call = gimple_build_call_internal
8376 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8377 scale, vec_oprnd);
8378 gimple_call_set_nothrow (call, true);
8379 new_stmt_info
8380 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8381 break;
8384 if (i > 0)
8385 /* Bump the vector pointer. */
8386 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8387 gsi, stmt_info, bump);
8389 if (slp)
8390 vec_oprnd = vec_oprnds[i];
8391 else if (grouped_store)
8392 /* For grouped stores vectorized defs are interleaved in
8393 vect_permute_store_chain(). */
8394 vec_oprnd = result_chain[i];
8396 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8397 if (aligned_access_p (first_dr_info))
8398 misalign = 0;
8399 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8401 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8402 misalign = 0;
8404 else
8405 misalign = DR_MISALIGNMENT (first_dr_info);
8406 if (dataref_offset == NULL_TREE
8407 && TREE_CODE (dataref_ptr) == SSA_NAME)
8408 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8409 misalign);
8411 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8413 tree perm_mask = perm_mask_for_reverse (vectype);
8414 tree perm_dest = vect_create_destination_var
8415 (vect_get_store_rhs (stmt_info), vectype);
8416 tree new_temp = make_ssa_name (perm_dest);
8418 /* Generate the permute statement. */
8419 gimple *perm_stmt
8420 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8421 vec_oprnd, perm_mask);
8422 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8424 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8425 vec_oprnd = new_temp;
8428 /* Arguments are ready. Create the new vector stmt. */
8429 if (final_mask)
8431 align = least_bit_hwi (misalign | align);
8432 tree ptr = build_int_cst (ref_type, align);
8433 gcall *call
8434 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8435 dataref_ptr, ptr,
8436 final_mask, vec_oprnd);
8437 gimple_call_set_nothrow (call, true);
8438 new_stmt_info
8439 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8441 else
8443 data_ref = fold_build2 (MEM_REF, vectype,
8444 dataref_ptr,
8445 dataref_offset
8446 ? dataref_offset
8447 : build_int_cst (ref_type, 0));
8448 if (aligned_access_p (first_dr_info))
8450 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8451 TREE_TYPE (data_ref)
8452 = build_aligned_type (TREE_TYPE (data_ref),
8453 align * BITS_PER_UNIT);
8454 else
8455 TREE_TYPE (data_ref)
8456 = build_aligned_type (TREE_TYPE (data_ref),
8457 TYPE_ALIGN (elem_type));
8458 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8459 gassign *new_stmt
8460 = gimple_build_assign (data_ref, vec_oprnd);
8461 new_stmt_info
8462 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8465 if (slp)
8466 continue;
8468 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8469 if (!next_stmt_info)
8470 break;
8473 if (!slp)
8475 if (j == 0)
8476 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8477 else
8478 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8479 prev_stmt_info = new_stmt_info;
8483 oprnds.release ();
8484 result_chain.release ();
8485 vec_oprnds.release ();
8487 return true;
8490 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8491 VECTOR_CST mask. No checks are made that the target platform supports the
8492 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8493 vect_gen_perm_mask_checked. */
8495 tree
8496 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8498 tree mask_type;
8500 poly_uint64 nunits = sel.length ();
8501 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8503 mask_type = build_vector_type (ssizetype, nunits);
8504 return vec_perm_indices_to_tree (mask_type, sel);
8507 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8508 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8510 tree
8511 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8513 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8514 return vect_gen_perm_mask_any (vectype, sel);
8517 /* Given a vector variable X and Y, that was generated for the scalar
8518 STMT_INFO, generate instructions to permute the vector elements of X and Y
8519 using permutation mask MASK_VEC, insert them at *GSI and return the
8520 permuted vector variable. */
8522 static tree
8523 permute_vec_elements (vec_info *vinfo,
8524 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8525 gimple_stmt_iterator *gsi)
8527 tree vectype = TREE_TYPE (x);
8528 tree perm_dest, data_ref;
8529 gimple *perm_stmt;
8531 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8532 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8533 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8534 else
8535 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8536 data_ref = make_ssa_name (perm_dest);
8538 /* Generate the permute statement. */
8539 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8540 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8542 return data_ref;
8545 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8546 inserting them on the loops preheader edge. Returns true if we
8547 were successful in doing so (and thus STMT_INFO can be moved then),
8548 otherwise returns false. */
8550 static bool
8551 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8553 ssa_op_iter i;
8554 tree op;
8555 bool any = false;
8557 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8559 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8560 if (!gimple_nop_p (def_stmt)
8561 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8563 /* Make sure we don't need to recurse. While we could do
8564 so in simple cases when there are more complex use webs
8565 we don't have an easy way to preserve stmt order to fulfil
8566 dependencies within them. */
8567 tree op2;
8568 ssa_op_iter i2;
8569 if (gimple_code (def_stmt) == GIMPLE_PHI)
8570 return false;
8571 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8573 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8574 if (!gimple_nop_p (def_stmt2)
8575 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8576 return false;
8578 any = true;
8582 if (!any)
8583 return true;
8585 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8587 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8588 if (!gimple_nop_p (def_stmt)
8589 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8591 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8592 gsi_remove (&gsi, false);
8593 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8597 return true;
8600 /* vectorizable_load.
8602 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8603 that can be vectorized.
8604 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8605 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8606 Return true if STMT_INFO is vectorizable in this way. */
8608 static bool
8609 vectorizable_load (vec_info *vinfo,
8610 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8611 stmt_vec_info *vec_stmt, slp_tree slp_node,
8612 slp_instance slp_node_instance,
8613 stmt_vector_for_cost *cost_vec)
8615 tree scalar_dest;
8616 tree vec_dest = NULL;
8617 tree data_ref = NULL;
8618 stmt_vec_info prev_stmt_info;
8619 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8620 class loop *loop = NULL;
8621 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8622 bool nested_in_vect_loop = false;
8623 tree elem_type;
8624 tree new_temp;
8625 machine_mode mode;
8626 tree dummy;
8627 enum dr_alignment_support alignment_support_scheme;
8628 tree dataref_ptr = NULL_TREE;
8629 tree dataref_offset = NULL_TREE;
8630 gimple *ptr_incr = NULL;
8631 int ncopies;
8632 int i, j;
8633 unsigned int group_size;
8634 poly_uint64 group_gap_adj;
8635 tree msq = NULL_TREE, lsq;
8636 tree offset = NULL_TREE;
8637 tree byte_offset = NULL_TREE;
8638 tree realignment_token = NULL_TREE;
8639 gphi *phi = NULL;
8640 vec<tree> dr_chain = vNULL;
8641 bool grouped_load = false;
8642 stmt_vec_info first_stmt_info;
8643 stmt_vec_info first_stmt_info_for_drptr = NULL;
8644 bool compute_in_loop = false;
8645 class loop *at_loop;
8646 int vec_num;
8647 bool slp = (slp_node != NULL);
8648 bool slp_perm = false;
8649 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8650 poly_uint64 vf;
8651 tree aggr_type;
8652 gather_scatter_info gs_info;
8653 tree ref_type;
8654 enum vect_def_type mask_dt = vect_unknown_def_type;
8656 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8657 return false;
8659 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8660 && ! vec_stmt)
8661 return false;
8663 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8664 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8666 scalar_dest = gimple_assign_lhs (assign);
8667 if (TREE_CODE (scalar_dest) != SSA_NAME)
8668 return false;
8670 tree_code code = gimple_assign_rhs_code (assign);
8671 if (code != ARRAY_REF
8672 && code != BIT_FIELD_REF
8673 && code != INDIRECT_REF
8674 && code != COMPONENT_REF
8675 && code != IMAGPART_EXPR
8676 && code != REALPART_EXPR
8677 && code != MEM_REF
8678 && TREE_CODE_CLASS (code) != tcc_declaration)
8679 return false;
8681 else
8683 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8684 if (!call || !gimple_call_internal_p (call))
8685 return false;
8687 internal_fn ifn = gimple_call_internal_fn (call);
8688 if (!internal_load_fn_p (ifn))
8689 return false;
8691 scalar_dest = gimple_call_lhs (call);
8692 if (!scalar_dest)
8693 return false;
8695 int mask_index = internal_fn_mask_index (ifn);
8696 if (mask_index >= 0)
8698 mask = gimple_call_arg (call, mask_index);
8699 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8700 &mask_vectype))
8701 return false;
8705 if (!STMT_VINFO_DATA_REF (stmt_info))
8706 return false;
8708 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8709 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8711 if (loop_vinfo)
8713 loop = LOOP_VINFO_LOOP (loop_vinfo);
8714 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8715 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8717 else
8718 vf = 1;
8720 /* Multiple types in SLP are handled by creating the appropriate number of
8721 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8722 case of SLP. */
8723 if (slp)
8724 ncopies = 1;
8725 else
8726 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8728 gcc_assert (ncopies >= 1);
8730 /* FORNOW. This restriction should be relaxed. */
8731 if (nested_in_vect_loop && ncopies > 1)
8733 if (dump_enabled_p ())
8734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8735 "multiple types in nested loop.\n");
8736 return false;
8739 /* Invalidate assumptions made by dependence analysis when vectorization
8740 on the unrolled body effectively re-orders stmts. */
8741 if (ncopies > 1
8742 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8743 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8744 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8746 if (dump_enabled_p ())
8747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8748 "cannot perform implicit CSE when unrolling "
8749 "with negative dependence distance\n");
8750 return false;
8753 elem_type = TREE_TYPE (vectype);
8754 mode = TYPE_MODE (vectype);
8756 /* FORNOW. In some cases can vectorize even if data-type not supported
8757 (e.g. - data copies). */
8758 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8760 if (dump_enabled_p ())
8761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8762 "Aligned load, but unsupported type.\n");
8763 return false;
8766 /* Check if the load is a part of an interleaving chain. */
8767 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8769 grouped_load = true;
8770 /* FORNOW */
8771 gcc_assert (!nested_in_vect_loop);
8772 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8774 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8775 group_size = DR_GROUP_SIZE (first_stmt_info);
8777 /* Refuse non-SLP vectorization of SLP-only groups. */
8778 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8780 if (dump_enabled_p ())
8781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8782 "cannot vectorize load in non-SLP mode.\n");
8783 return false;
8786 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8787 slp_perm = true;
8789 /* Invalidate assumptions made by dependence analysis when vectorization
8790 on the unrolled body effectively re-orders stmts. */
8791 if (!PURE_SLP_STMT (stmt_info)
8792 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8793 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8794 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8796 if (dump_enabled_p ())
8797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8798 "cannot perform implicit CSE when performing "
8799 "group loads with negative dependence distance\n");
8800 return false;
8803 else
8804 group_size = 1;
8806 vect_memory_access_type memory_access_type;
8807 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, VLS_LOAD,
8808 ncopies, &memory_access_type, &gs_info))
8809 return false;
8811 if (mask)
8813 if (memory_access_type == VMAT_CONTIGUOUS)
8815 machine_mode vec_mode = TYPE_MODE (vectype);
8816 if (!VECTOR_MODE_P (vec_mode)
8817 || !can_vec_mask_load_store_p (vec_mode,
8818 TYPE_MODE (mask_vectype), true))
8819 return false;
8821 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8822 && memory_access_type != VMAT_GATHER_SCATTER)
8824 if (dump_enabled_p ())
8825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8826 "unsupported access type for masked load.\n");
8827 return false;
8831 if (!vec_stmt) /* transformation not required. */
8833 if (!slp)
8834 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8836 if (loop_vinfo
8837 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8838 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8839 memory_access_type, &gs_info, mask);
8841 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8842 vect_model_load_cost (vinfo, stmt_info, ncopies, memory_access_type,
8843 slp_node_instance, slp_node, cost_vec);
8844 return true;
8847 if (!slp)
8848 gcc_assert (memory_access_type
8849 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8851 if (dump_enabled_p ())
8852 dump_printf_loc (MSG_NOTE, vect_location,
8853 "transform load. ncopies = %d\n", ncopies);
8855 /* Transform. */
8857 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8858 ensure_base_align (dr_info);
8860 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8862 vect_build_gather_load_calls (vinfo,
8863 stmt_info, gsi, vec_stmt, &gs_info, mask);
8864 return true;
8867 if (memory_access_type == VMAT_INVARIANT)
8869 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8870 /* If we have versioned for aliasing or the loop doesn't
8871 have any data dependencies that would preclude this,
8872 then we are sure this is a loop invariant load and
8873 thus we can insert it on the preheader edge. */
8874 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8875 && !nested_in_vect_loop
8876 && hoist_defs_of_uses (stmt_info, loop));
8877 if (hoist_p)
8879 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8880 if (dump_enabled_p ())
8881 dump_printf_loc (MSG_NOTE, vect_location,
8882 "hoisting out of the vectorized loop: %G", stmt);
8883 scalar_dest = copy_ssa_name (scalar_dest);
8884 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8885 gsi_insert_on_edge_immediate
8886 (loop_preheader_edge (loop),
8887 gimple_build_assign (scalar_dest, rhs));
8889 /* These copies are all equivalent, but currently the representation
8890 requires a separate STMT_VINFO_VEC_STMT for each one. */
8891 prev_stmt_info = NULL;
8892 gimple_stmt_iterator gsi2 = *gsi;
8893 gsi_next (&gsi2);
8894 for (j = 0; j < ncopies; j++)
8896 stmt_vec_info new_stmt_info;
8897 if (hoist_p)
8899 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8900 vectype, NULL);
8901 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8902 new_stmt_info = vinfo->add_stmt (new_stmt);
8904 else
8906 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8907 vectype, &gsi2);
8908 new_stmt_info = vinfo->lookup_def (new_temp);
8910 if (slp)
8911 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8912 else if (j == 0)
8913 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8914 else
8915 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8916 prev_stmt_info = new_stmt_info;
8918 return true;
8921 if (memory_access_type == VMAT_ELEMENTWISE
8922 || memory_access_type == VMAT_STRIDED_SLP)
8924 gimple_stmt_iterator incr_gsi;
8925 bool insert_after;
8926 gimple *incr;
8927 tree offvar;
8928 tree ivstep;
8929 tree running_off;
8930 vec<constructor_elt, va_gc> *v = NULL;
8931 tree stride_base, stride_step, alias_off;
8932 /* Checked by get_load_store_type. */
8933 unsigned int const_nunits = nunits.to_constant ();
8934 unsigned HOST_WIDE_INT cst_offset = 0;
8935 tree dr_offset;
8937 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8938 gcc_assert (!nested_in_vect_loop);
8940 if (grouped_load)
8942 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8943 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8945 else
8947 first_stmt_info = stmt_info;
8948 first_dr_info = dr_info;
8950 if (slp && grouped_load)
8952 group_size = DR_GROUP_SIZE (first_stmt_info);
8953 ref_type = get_group_alias_ptr_type (first_stmt_info);
8955 else
8957 if (grouped_load)
8958 cst_offset
8959 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8960 * vect_get_place_in_interleaving_chain (stmt_info,
8961 first_stmt_info));
8962 group_size = 1;
8963 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8966 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8967 stride_base
8968 = fold_build_pointer_plus
8969 (DR_BASE_ADDRESS (first_dr_info->dr),
8970 size_binop (PLUS_EXPR,
8971 convert_to_ptrofftype (dr_offset),
8972 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8973 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8975 /* For a load with loop-invariant (but other than power-of-2)
8976 stride (i.e. not a grouped access) like so:
8978 for (i = 0; i < n; i += stride)
8979 ... = array[i];
8981 we generate a new induction variable and new accesses to
8982 form a new vector (or vectors, depending on ncopies):
8984 for (j = 0; ; j += VF*stride)
8985 tmp1 = array[j];
8986 tmp2 = array[j + stride];
8988 vectemp = {tmp1, tmp2, ...}
8991 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8992 build_int_cst (TREE_TYPE (stride_step), vf));
8994 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8996 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8997 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8998 create_iv (stride_base, ivstep, NULL,
8999 loop, &incr_gsi, insert_after,
9000 &offvar, NULL);
9001 incr = gsi_stmt (incr_gsi);
9002 loop_vinfo->add_stmt (incr);
9004 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9006 prev_stmt_info = NULL;
9007 running_off = offvar;
9008 alias_off = build_int_cst (ref_type, 0);
9009 int nloads = const_nunits;
9010 int lnel = 1;
9011 tree ltype = TREE_TYPE (vectype);
9012 tree lvectype = vectype;
9013 auto_vec<tree> dr_chain;
9014 if (memory_access_type == VMAT_STRIDED_SLP)
9016 if (group_size < const_nunits)
9018 /* First check if vec_init optab supports construction from vector
9019 elts directly. Otherwise avoid emitting a constructor of
9020 vector elements by performing the loads using an integer type
9021 of the same size, constructing a vector of those and then
9022 re-interpreting it as the original vector type. This avoids a
9023 huge runtime penalty due to the general inability to perform
9024 store forwarding from smaller stores to a larger load. */
9025 tree ptype;
9026 tree vtype
9027 = vector_vector_composition_type (vectype,
9028 const_nunits / group_size,
9029 &ptype);
9030 if (vtype != NULL_TREE)
9032 nloads = const_nunits / group_size;
9033 lnel = group_size;
9034 lvectype = vtype;
9035 ltype = ptype;
9038 else
9040 nloads = 1;
9041 lnel = const_nunits;
9042 ltype = vectype;
9044 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9046 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9047 else if (nloads == 1)
9048 ltype = vectype;
9050 if (slp)
9052 /* For SLP permutation support we need to load the whole group,
9053 not only the number of vector stmts the permutation result
9054 fits in. */
9055 if (slp_perm)
9057 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9058 variable VF. */
9059 unsigned int const_vf = vf.to_constant ();
9060 ncopies = CEIL (group_size * const_vf, const_nunits);
9061 dr_chain.create (ncopies);
9063 else
9064 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9066 unsigned int group_el = 0;
9067 unsigned HOST_WIDE_INT
9068 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9069 for (j = 0; j < ncopies; j++)
9071 if (nloads > 1)
9072 vec_alloc (v, nloads);
9073 stmt_vec_info new_stmt_info = NULL;
9074 for (i = 0; i < nloads; i++)
9076 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9077 group_el * elsz + cst_offset);
9078 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9079 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9080 gassign *new_stmt
9081 = gimple_build_assign (make_ssa_name (ltype), data_ref);
9082 new_stmt_info
9083 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9084 if (nloads > 1)
9085 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9086 gimple_assign_lhs (new_stmt));
9088 group_el += lnel;
9089 if (! slp
9090 || group_el == group_size)
9092 tree newoff = copy_ssa_name (running_off);
9093 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9094 running_off, stride_step);
9095 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9097 running_off = newoff;
9098 group_el = 0;
9101 if (nloads > 1)
9103 tree vec_inv = build_constructor (lvectype, v);
9104 new_temp = vect_init_vector (vinfo, stmt_info,
9105 vec_inv, lvectype, gsi);
9106 new_stmt_info = vinfo->lookup_def (new_temp);
9107 if (lvectype != vectype)
9109 gassign *new_stmt
9110 = gimple_build_assign (make_ssa_name (vectype),
9111 VIEW_CONVERT_EXPR,
9112 build1 (VIEW_CONVERT_EXPR,
9113 vectype, new_temp));
9114 new_stmt_info
9115 = vect_finish_stmt_generation (vinfo, stmt_info,
9116 new_stmt, gsi);
9120 if (slp)
9122 if (slp_perm)
9123 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9124 else
9125 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9127 else
9129 if (j == 0)
9130 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9131 else
9132 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9133 prev_stmt_info = new_stmt_info;
9136 if (slp_perm)
9138 unsigned n_perms;
9139 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9140 slp_node_instance, false, &n_perms);
9142 return true;
9145 if (memory_access_type == VMAT_GATHER_SCATTER
9146 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9147 grouped_load = false;
9149 if (grouped_load)
9151 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9152 group_size = DR_GROUP_SIZE (first_stmt_info);
9153 /* For SLP vectorization we directly vectorize a subchain
9154 without permutation. */
9155 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9156 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9157 /* For BB vectorization always use the first stmt to base
9158 the data ref pointer on. */
9159 if (bb_vinfo)
9160 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9162 /* Check if the chain of loads is already vectorized. */
9163 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9164 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9165 ??? But we can only do so if there is exactly one
9166 as we have no way to get at the rest. Leave the CSE
9167 opportunity alone.
9168 ??? With the group load eventually participating
9169 in multiple different permutations (having multiple
9170 slp nodes which refer to the same group) the CSE
9171 is even wrong code. See PR56270. */
9172 && !slp)
9174 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9175 return true;
9177 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9178 group_gap_adj = 0;
9180 /* VEC_NUM is the number of vect stmts to be created for this group. */
9181 if (slp)
9183 grouped_load = false;
9184 /* If an SLP permutation is from N elements to N elements,
9185 and if one vector holds a whole number of N, we can load
9186 the inputs to the permutation in the same way as an
9187 unpermuted sequence. In other cases we need to load the
9188 whole group, not only the number of vector stmts the
9189 permutation result fits in. */
9190 if (slp_perm
9191 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
9192 || !multiple_p (nunits, group_size)))
9194 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9195 variable VF; see vect_transform_slp_perm_load. */
9196 unsigned int const_vf = vf.to_constant ();
9197 unsigned int const_nunits = nunits.to_constant ();
9198 vec_num = CEIL (group_size * const_vf, const_nunits);
9199 group_gap_adj = vf * group_size - nunits * vec_num;
9201 else
9203 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9204 group_gap_adj
9205 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
9208 else
9209 vec_num = group_size;
9211 ref_type = get_group_alias_ptr_type (first_stmt_info);
9213 else
9215 first_stmt_info = stmt_info;
9216 first_dr_info = dr_info;
9217 group_size = vec_num = 1;
9218 group_gap_adj = 0;
9219 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9222 /* Gather-scatter accesses perform only component accesses, alignment
9223 is irrelevant for them. */
9224 if (memory_access_type == VMAT_GATHER_SCATTER)
9225 alignment_support_scheme = dr_unaligned_supported;
9226 else
9227 alignment_support_scheme
9228 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
9230 gcc_assert (alignment_support_scheme);
9231 vec_loop_masks *loop_masks
9232 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9233 ? &LOOP_VINFO_MASKS (loop_vinfo)
9234 : NULL);
9235 /* Targets with store-lane instructions must not require explicit
9236 realignment. vect_supportable_dr_alignment always returns either
9237 dr_aligned or dr_unaligned_supported for masked operations. */
9238 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9239 && !mask
9240 && !loop_masks)
9241 || alignment_support_scheme == dr_aligned
9242 || alignment_support_scheme == dr_unaligned_supported);
9244 /* In case the vectorization factor (VF) is bigger than the number
9245 of elements that we can fit in a vectype (nunits), we have to generate
9246 more than one vector stmt - i.e - we need to "unroll" the
9247 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9248 from one copy of the vector stmt to the next, in the field
9249 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9250 stages to find the correct vector defs to be used when vectorizing
9251 stmts that use the defs of the current stmt. The example below
9252 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9253 need to create 4 vectorized stmts):
9255 before vectorization:
9256 RELATED_STMT VEC_STMT
9257 S1: x = memref - -
9258 S2: z = x + 1 - -
9260 step 1: vectorize stmt S1:
9261 We first create the vector stmt VS1_0, and, as usual, record a
9262 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9263 Next, we create the vector stmt VS1_1, and record a pointer to
9264 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9265 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9266 stmts and pointers:
9267 RELATED_STMT VEC_STMT
9268 VS1_0: vx0 = memref0 VS1_1 -
9269 VS1_1: vx1 = memref1 VS1_2 -
9270 VS1_2: vx2 = memref2 VS1_3 -
9271 VS1_3: vx3 = memref3 - -
9272 S1: x = load - VS1_0
9273 S2: z = x + 1 - -
9275 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9276 information we recorded in RELATED_STMT field is used to vectorize
9277 stmt S2. */
9279 /* In case of interleaving (non-unit grouped access):
9281 S1: x2 = &base + 2
9282 S2: x0 = &base
9283 S3: x1 = &base + 1
9284 S4: x3 = &base + 3
9286 Vectorized loads are created in the order of memory accesses
9287 starting from the access of the first stmt of the chain:
9289 VS1: vx0 = &base
9290 VS2: vx1 = &base + vec_size*1
9291 VS3: vx3 = &base + vec_size*2
9292 VS4: vx4 = &base + vec_size*3
9294 Then permutation statements are generated:
9296 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9297 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9300 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9301 (the order of the data-refs in the output of vect_permute_load_chain
9302 corresponds to the order of scalar stmts in the interleaving chain - see
9303 the documentation of vect_permute_load_chain()).
9304 The generation of permutation stmts and recording them in
9305 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9307 In case of both multiple types and interleaving, the vector loads and
9308 permutation stmts above are created for every copy. The result vector
9309 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9310 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9312 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9313 on a target that supports unaligned accesses (dr_unaligned_supported)
9314 we generate the following code:
9315 p = initial_addr;
9316 indx = 0;
9317 loop {
9318 p = p + indx * vectype_size;
9319 vec_dest = *(p);
9320 indx = indx + 1;
9323 Otherwise, the data reference is potentially unaligned on a target that
9324 does not support unaligned accesses (dr_explicit_realign_optimized) -
9325 then generate the following code, in which the data in each iteration is
9326 obtained by two vector loads, one from the previous iteration, and one
9327 from the current iteration:
9328 p1 = initial_addr;
9329 msq_init = *(floor(p1))
9330 p2 = initial_addr + VS - 1;
9331 realignment_token = call target_builtin;
9332 indx = 0;
9333 loop {
9334 p2 = p2 + indx * vectype_size
9335 lsq = *(floor(p2))
9336 vec_dest = realign_load (msq, lsq, realignment_token)
9337 indx = indx + 1;
9338 msq = lsq;
9339 } */
9341 /* If the misalignment remains the same throughout the execution of the
9342 loop, we can create the init_addr and permutation mask at the loop
9343 preheader. Otherwise, it needs to be created inside the loop.
9344 This can only occur when vectorizing memory accesses in the inner-loop
9345 nested within an outer-loop that is being vectorized. */
9347 if (nested_in_vect_loop
9348 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9349 GET_MODE_SIZE (TYPE_MODE (vectype))))
9351 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9352 compute_in_loop = true;
9355 bool diff_first_stmt_info
9356 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9358 if ((alignment_support_scheme == dr_explicit_realign_optimized
9359 || alignment_support_scheme == dr_explicit_realign)
9360 && !compute_in_loop)
9362 /* If we have different first_stmt_info, we can't set up realignment
9363 here, since we can't guarantee first_stmt_info DR has been
9364 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9365 distance from first_stmt_info DR instead as below. */
9366 if (!diff_first_stmt_info)
9367 msq = vect_setup_realignment (loop_vinfo,
9368 first_stmt_info, gsi, &realignment_token,
9369 alignment_support_scheme, NULL_TREE,
9370 &at_loop);
9371 if (alignment_support_scheme == dr_explicit_realign_optimized)
9373 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9374 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9375 size_one_node);
9376 gcc_assert (!first_stmt_info_for_drptr);
9379 else
9380 at_loop = loop;
9382 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9383 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9385 tree bump;
9386 tree vec_offset = NULL_TREE;
9387 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9389 aggr_type = NULL_TREE;
9390 bump = NULL_TREE;
9392 else if (memory_access_type == VMAT_GATHER_SCATTER)
9394 aggr_type = elem_type;
9395 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9396 &bump, &vec_offset);
9398 else
9400 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9401 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9402 else
9403 aggr_type = vectype;
9404 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9405 memory_access_type);
9408 tree vec_mask = NULL_TREE;
9409 prev_stmt_info = NULL;
9410 poly_uint64 group_elt = 0;
9411 for (j = 0; j < ncopies; j++)
9413 stmt_vec_info new_stmt_info = NULL;
9414 /* 1. Create the vector or array pointer update chain. */
9415 if (j == 0)
9417 bool simd_lane_access_p
9418 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9419 if (simd_lane_access_p
9420 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9421 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9422 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9423 && integer_zerop (DR_INIT (first_dr_info->dr))
9424 && alias_sets_conflict_p (get_alias_set (aggr_type),
9425 get_alias_set (TREE_TYPE (ref_type)))
9426 && (alignment_support_scheme == dr_aligned
9427 || alignment_support_scheme == dr_unaligned_supported))
9429 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9430 dataref_offset = build_int_cst (ref_type, 0);
9432 else if (diff_first_stmt_info)
9434 dataref_ptr
9435 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9436 aggr_type, at_loop, offset, &dummy,
9437 gsi, &ptr_incr, simd_lane_access_p,
9438 byte_offset, bump);
9439 /* Adjust the pointer by the difference to first_stmt. */
9440 data_reference_p ptrdr
9441 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9442 tree diff
9443 = fold_convert (sizetype,
9444 size_binop (MINUS_EXPR,
9445 DR_INIT (first_dr_info->dr),
9446 DR_INIT (ptrdr)));
9447 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9448 stmt_info, diff);
9449 if (alignment_support_scheme == dr_explicit_realign)
9451 msq = vect_setup_realignment (vinfo,
9452 first_stmt_info_for_drptr, gsi,
9453 &realignment_token,
9454 alignment_support_scheme,
9455 dataref_ptr, &at_loop);
9456 gcc_assert (!compute_in_loop);
9459 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9460 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9461 &dataref_ptr, &vec_offset);
9462 else
9463 dataref_ptr
9464 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9465 at_loop,
9466 offset, &dummy, gsi, &ptr_incr,
9467 simd_lane_access_p,
9468 byte_offset, bump);
9469 if (mask)
9471 if (slp_node)
9473 auto_vec<vec<tree> > vec_defs (1);
9474 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
9475 vec_mask = vec_defs[0][0];
9477 else
9478 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info,
9479 mask_vectype);
9482 else
9484 if (dataref_offset)
9485 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9486 bump);
9487 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9488 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9489 else
9490 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9491 stmt_info, bump);
9492 if (mask)
9493 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9496 if (grouped_load || slp_perm)
9497 dr_chain.create (vec_num);
9499 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9501 tree vec_array;
9503 vec_array = create_vector_array (vectype, vec_num);
9505 tree final_mask = NULL_TREE;
9506 if (loop_masks)
9507 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9508 vectype, j);
9509 if (vec_mask)
9510 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9511 vec_mask, gsi);
9513 gcall *call;
9514 if (final_mask)
9516 /* Emit:
9517 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9518 VEC_MASK). */
9519 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9520 tree alias_ptr = build_int_cst (ref_type, align);
9521 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9522 dataref_ptr, alias_ptr,
9523 final_mask);
9525 else
9527 /* Emit:
9528 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9529 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9530 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9532 gimple_call_set_lhs (call, vec_array);
9533 gimple_call_set_nothrow (call, true);
9534 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
9535 call, gsi);
9537 /* Extract each vector into an SSA_NAME. */
9538 for (i = 0; i < vec_num; i++)
9540 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9541 vec_array, i);
9542 dr_chain.quick_push (new_temp);
9545 /* Record the mapping between SSA_NAMEs and statements. */
9546 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9548 /* Record that VEC_ARRAY is now dead. */
9549 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9551 else
9553 for (i = 0; i < vec_num; i++)
9555 tree final_mask = NULL_TREE;
9556 if (loop_masks
9557 && memory_access_type != VMAT_INVARIANT)
9558 final_mask = vect_get_loop_mask (gsi, loop_masks,
9559 vec_num * ncopies,
9560 vectype, vec_num * j + i);
9561 if (vec_mask)
9562 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9563 vec_mask, gsi);
9565 if (i > 0)
9566 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9567 gsi, stmt_info, bump);
9569 /* 2. Create the vector-load in the loop. */
9570 gimple *new_stmt = NULL;
9571 switch (alignment_support_scheme)
9573 case dr_aligned:
9574 case dr_unaligned_supported:
9576 unsigned int misalign;
9577 unsigned HOST_WIDE_INT align;
9579 if (memory_access_type == VMAT_GATHER_SCATTER)
9581 tree zero = build_zero_cst (vectype);
9582 tree scale = size_int (gs_info.scale);
9583 gcall *call;
9584 if (loop_masks)
9585 call = gimple_build_call_internal
9586 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9587 vec_offset, scale, zero, final_mask);
9588 else
9589 call = gimple_build_call_internal
9590 (IFN_GATHER_LOAD, 4, dataref_ptr,
9591 vec_offset, scale, zero);
9592 gimple_call_set_nothrow (call, true);
9593 new_stmt = call;
9594 data_ref = NULL_TREE;
9595 break;
9598 align =
9599 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9600 if (alignment_support_scheme == dr_aligned)
9602 gcc_assert (aligned_access_p (first_dr_info));
9603 misalign = 0;
9605 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9607 align = dr_alignment
9608 (vect_dr_behavior (vinfo, first_dr_info));
9609 misalign = 0;
9611 else
9612 misalign = DR_MISALIGNMENT (first_dr_info);
9613 if (dataref_offset == NULL_TREE
9614 && TREE_CODE (dataref_ptr) == SSA_NAME)
9615 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9616 align, misalign);
9618 if (final_mask)
9620 align = least_bit_hwi (misalign | align);
9621 tree ptr = build_int_cst (ref_type, align);
9622 gcall *call
9623 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9624 dataref_ptr, ptr,
9625 final_mask);
9626 gimple_call_set_nothrow (call, true);
9627 new_stmt = call;
9628 data_ref = NULL_TREE;
9630 else
9632 tree ltype = vectype;
9633 tree new_vtype = NULL_TREE;
9634 /* If there's no peeling for gaps but we have a gap
9635 with slp loads then load the lower half of the
9636 vector only. See get_group_load_store_type for
9637 when we apply this optimization. */
9638 if (slp
9639 && loop_vinfo
9640 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9641 && DR_GROUP_GAP (first_stmt_info) != 0
9642 && known_eq (nunits,
9643 (group_size
9644 - DR_GROUP_GAP (first_stmt_info)) * 2)
9645 && known_eq (nunits, group_size))
9647 tree half_vtype;
9648 new_vtype
9649 = vector_vector_composition_type (vectype, 2,
9650 &half_vtype);
9651 if (new_vtype != NULL_TREE)
9652 ltype = half_vtype;
9654 tree offset
9655 = (dataref_offset ? dataref_offset
9656 : build_int_cst (ref_type, 0));
9657 if (ltype != vectype
9658 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9660 unsigned HOST_WIDE_INT gap
9661 = DR_GROUP_GAP (first_stmt_info);
9662 gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9663 tree gapcst = build_int_cst (ref_type, gap);
9664 offset = size_binop (PLUS_EXPR, offset, gapcst);
9666 data_ref
9667 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9668 if (alignment_support_scheme == dr_aligned)
9670 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9671 TREE_TYPE (data_ref)
9672 = build_aligned_type (TREE_TYPE (data_ref),
9673 align * BITS_PER_UNIT);
9674 else
9675 TREE_TYPE (data_ref)
9676 = build_aligned_type (TREE_TYPE (data_ref),
9677 TYPE_ALIGN (elem_type));
9678 if (ltype != vectype)
9680 vect_copy_ref_info (data_ref,
9681 DR_REF (first_dr_info->dr));
9682 tree tem = make_ssa_name (ltype);
9683 new_stmt = gimple_build_assign (tem, data_ref);
9684 vect_finish_stmt_generation (vinfo, stmt_info,
9685 new_stmt, gsi);
9686 data_ref = NULL;
9687 vec<constructor_elt, va_gc> *v;
9688 vec_alloc (v, 2);
9689 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9691 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9692 build_zero_cst (ltype));
9693 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9695 else
9697 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9698 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9699 build_zero_cst (ltype));
9701 gcc_assert (new_vtype != NULL_TREE);
9702 if (new_vtype == vectype)
9703 new_stmt = gimple_build_assign (
9704 vec_dest, build_constructor (vectype, v));
9705 else
9707 tree new_vname = make_ssa_name (new_vtype);
9708 new_stmt = gimple_build_assign (
9709 new_vname, build_constructor (new_vtype, v));
9710 vect_finish_stmt_generation (vinfo, stmt_info,
9711 new_stmt, gsi);
9712 new_stmt = gimple_build_assign (
9713 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9714 new_vname));
9718 break;
9720 case dr_explicit_realign:
9722 tree ptr, bump;
9724 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9726 if (compute_in_loop)
9727 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9728 &realignment_token,
9729 dr_explicit_realign,
9730 dataref_ptr, NULL);
9732 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9733 ptr = copy_ssa_name (dataref_ptr);
9734 else
9735 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9736 // For explicit realign the target alignment should be
9737 // known at compile time.
9738 unsigned HOST_WIDE_INT align =
9739 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9740 new_stmt = gimple_build_assign
9741 (ptr, BIT_AND_EXPR, dataref_ptr,
9742 build_int_cst
9743 (TREE_TYPE (dataref_ptr),
9744 -(HOST_WIDE_INT) align));
9745 vect_finish_stmt_generation (vinfo, stmt_info,
9746 new_stmt, gsi);
9747 data_ref
9748 = build2 (MEM_REF, vectype, ptr,
9749 build_int_cst (ref_type, 0));
9750 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9751 vec_dest = vect_create_destination_var (scalar_dest,
9752 vectype);
9753 new_stmt = gimple_build_assign (vec_dest, data_ref);
9754 new_temp = make_ssa_name (vec_dest, new_stmt);
9755 gimple_assign_set_lhs (new_stmt, new_temp);
9756 gimple_move_vops (new_stmt, stmt_info->stmt);
9757 vect_finish_stmt_generation (vinfo, stmt_info,
9758 new_stmt, gsi);
9759 msq = new_temp;
9761 bump = size_binop (MULT_EXPR, vs,
9762 TYPE_SIZE_UNIT (elem_type));
9763 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9764 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9765 stmt_info, bump);
9766 new_stmt = gimple_build_assign
9767 (NULL_TREE, BIT_AND_EXPR, ptr,
9768 build_int_cst
9769 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9770 ptr = copy_ssa_name (ptr, new_stmt);
9771 gimple_assign_set_lhs (new_stmt, ptr);
9772 vect_finish_stmt_generation (vinfo, stmt_info,
9773 new_stmt, gsi);
9774 data_ref
9775 = build2 (MEM_REF, vectype, ptr,
9776 build_int_cst (ref_type, 0));
9777 break;
9779 case dr_explicit_realign_optimized:
9781 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9782 new_temp = copy_ssa_name (dataref_ptr);
9783 else
9784 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9785 // We should only be doing this if we know the target
9786 // alignment at compile time.
9787 unsigned HOST_WIDE_INT align =
9788 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9789 new_stmt = gimple_build_assign
9790 (new_temp, BIT_AND_EXPR, dataref_ptr,
9791 build_int_cst (TREE_TYPE (dataref_ptr),
9792 -(HOST_WIDE_INT) align));
9793 vect_finish_stmt_generation (vinfo, stmt_info,
9794 new_stmt, gsi);
9795 data_ref
9796 = build2 (MEM_REF, vectype, new_temp,
9797 build_int_cst (ref_type, 0));
9798 break;
9800 default:
9801 gcc_unreachable ();
9803 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9804 /* DATA_REF is null if we've already built the statement. */
9805 if (data_ref)
9807 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9808 new_stmt = gimple_build_assign (vec_dest, data_ref);
9810 new_temp = make_ssa_name (vec_dest, new_stmt);
9811 gimple_set_lhs (new_stmt, new_temp);
9812 new_stmt_info
9813 = vect_finish_stmt_generation (vinfo, stmt_info,
9814 new_stmt, gsi);
9816 /* 3. Handle explicit realignment if necessary/supported.
9817 Create in loop:
9818 vec_dest = realign_load (msq, lsq, realignment_token) */
9819 if (alignment_support_scheme == dr_explicit_realign_optimized
9820 || alignment_support_scheme == dr_explicit_realign)
9822 lsq = gimple_assign_lhs (new_stmt);
9823 if (!realignment_token)
9824 realignment_token = dataref_ptr;
9825 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9826 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9827 msq, lsq, realignment_token);
9828 new_temp = make_ssa_name (vec_dest, new_stmt);
9829 gimple_assign_set_lhs (new_stmt, new_temp);
9830 new_stmt_info
9831 = vect_finish_stmt_generation (vinfo, stmt_info,
9832 new_stmt, gsi);
9834 if (alignment_support_scheme == dr_explicit_realign_optimized)
9836 gcc_assert (phi);
9837 if (i == vec_num - 1 && j == ncopies - 1)
9838 add_phi_arg (phi, lsq,
9839 loop_latch_edge (containing_loop),
9840 UNKNOWN_LOCATION);
9841 msq = lsq;
9845 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9847 tree perm_mask = perm_mask_for_reverse (vectype);
9848 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9849 perm_mask, stmt_info, gsi);
9850 new_stmt_info = vinfo->lookup_def (new_temp);
9853 /* Collect vector loads and later create their permutation in
9854 vect_transform_grouped_load (). */
9855 if (grouped_load || slp_perm)
9856 dr_chain.quick_push (new_temp);
9858 /* Store vector loads in the corresponding SLP_NODE. */
9859 if (slp && !slp_perm)
9860 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9862 /* With SLP permutation we load the gaps as well, without
9863 we need to skip the gaps after we manage to fully load
9864 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9865 group_elt += nunits;
9866 if (maybe_ne (group_gap_adj, 0U)
9867 && !slp_perm
9868 && known_eq (group_elt, group_size - group_gap_adj))
9870 poly_wide_int bump_val
9871 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9872 * group_gap_adj);
9873 tree bump = wide_int_to_tree (sizetype, bump_val);
9874 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9875 gsi, stmt_info, bump);
9876 group_elt = 0;
9879 /* Bump the vector pointer to account for a gap or for excess
9880 elements loaded for a permuted SLP load. */
9881 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9883 poly_wide_int bump_val
9884 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9885 * group_gap_adj);
9886 tree bump = wide_int_to_tree (sizetype, bump_val);
9887 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9888 stmt_info, bump);
9892 if (slp && !slp_perm)
9893 continue;
9895 if (slp_perm)
9897 unsigned n_perms;
9898 if (!vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9899 slp_node_instance, false,
9900 &n_perms))
9902 dr_chain.release ();
9903 return false;
9906 else
9908 if (grouped_load)
9910 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9911 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9912 group_size, gsi);
9913 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9915 else
9917 if (j == 0)
9918 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9919 else
9920 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9921 prev_stmt_info = new_stmt_info;
9924 dr_chain.release ();
9927 return true;
9930 /* Function vect_is_simple_cond.
9932 Input:
9933 LOOP - the loop that is being vectorized.
9934 COND - Condition that is checked for simple use.
9936 Output:
9937 *COMP_VECTYPE - the vector type for the comparison.
9938 *DTS - The def types for the arguments of the comparison
9940 Returns whether a COND can be vectorized. Checks whether
9941 condition operands are supportable using vec_is_simple_use. */
9943 static bool
9944 vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
9945 tree *comp_vectype, enum vect_def_type *dts,
9946 tree vectype)
9948 tree lhs, rhs;
9949 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9951 /* Mask case. */
9952 if (TREE_CODE (cond) == SSA_NAME
9953 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9955 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9956 || !*comp_vectype
9957 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9958 return false;
9959 return true;
9962 if (!COMPARISON_CLASS_P (cond))
9963 return false;
9965 lhs = TREE_OPERAND (cond, 0);
9966 rhs = TREE_OPERAND (cond, 1);
9968 if (TREE_CODE (lhs) == SSA_NAME)
9970 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9971 return false;
9973 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9974 || TREE_CODE (lhs) == FIXED_CST)
9975 dts[0] = vect_constant_def;
9976 else
9977 return false;
9979 if (TREE_CODE (rhs) == SSA_NAME)
9981 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
9982 return false;
9984 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9985 || TREE_CODE (rhs) == FIXED_CST)
9986 dts[1] = vect_constant_def;
9987 else
9988 return false;
9990 if (vectype1 && vectype2
9991 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9992 TYPE_VECTOR_SUBPARTS (vectype2)))
9993 return false;
9995 *comp_vectype = vectype1 ? vectype1 : vectype2;
9996 /* Invariant comparison. */
9997 if (! *comp_vectype)
9999 tree scalar_type = TREE_TYPE (lhs);
10000 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10001 *comp_vectype = truth_type_for (vectype);
10002 else
10004 /* If we can widen the comparison to match vectype do so. */
10005 if (INTEGRAL_TYPE_P (scalar_type)
10006 && !slp_node
10007 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10008 TYPE_SIZE (TREE_TYPE (vectype))))
10009 scalar_type = build_nonstandard_integer_type
10010 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
10011 TYPE_UNSIGNED (scalar_type));
10012 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10013 slp_node);
10017 return true;
10020 /* vectorizable_condition.
10022 Check if STMT_INFO is conditional modify expression that can be vectorized.
10023 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10024 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10025 at GSI.
10027 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10029 Return true if STMT_INFO is vectorizable in this way. */
10031 static bool
10032 vectorizable_condition (vec_info *vinfo,
10033 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10034 stmt_vec_info *vec_stmt,
10035 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10037 tree scalar_dest = NULL_TREE;
10038 tree vec_dest = NULL_TREE;
10039 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10040 tree then_clause, else_clause;
10041 tree comp_vectype = NULL_TREE;
10042 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10043 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10044 tree vec_compare;
10045 tree new_temp;
10046 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10047 enum vect_def_type dts[4]
10048 = {vect_unknown_def_type, vect_unknown_def_type,
10049 vect_unknown_def_type, vect_unknown_def_type};
10050 int ndts = 4;
10051 int ncopies;
10052 int vec_num;
10053 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10054 stmt_vec_info prev_stmt_info = NULL;
10055 int i, j;
10056 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10057 vec<tree> vec_oprnds0 = vNULL;
10058 vec<tree> vec_oprnds1 = vNULL;
10059 vec<tree> vec_oprnds2 = vNULL;
10060 vec<tree> vec_oprnds3 = vNULL;
10061 tree vec_cmp_type;
10062 bool masked = false;
10064 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10065 return false;
10067 /* Is vectorizable conditional operation? */
10068 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10069 if (!stmt)
10070 return false;
10072 code = gimple_assign_rhs_code (stmt);
10073 if (code != COND_EXPR)
10074 return false;
10076 stmt_vec_info reduc_info = NULL;
10077 int reduc_index = -1;
10078 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10079 bool for_reduction
10080 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10081 if (for_reduction)
10083 if (STMT_SLP_TYPE (stmt_info))
10084 return false;
10085 reduc_info = info_for_reduction (vinfo, stmt_info);
10086 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10087 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10088 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10089 || reduc_index != -1);
10091 else
10093 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10094 return false;
10096 /* FORNOW: only supported as part of a reduction. */
10097 if (STMT_VINFO_LIVE_P (stmt_info))
10099 if (dump_enabled_p ())
10100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10101 "value used after loop.\n");
10102 return false;
10106 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10107 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10109 if (slp_node)
10111 ncopies = 1;
10112 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10114 else
10116 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10117 vec_num = 1;
10120 gcc_assert (ncopies >= 1);
10121 if (for_reduction && ncopies > 1)
10122 return false; /* FORNOW */
10124 cond_expr = gimple_assign_rhs1 (stmt);
10125 then_clause = gimple_assign_rhs2 (stmt);
10126 else_clause = gimple_assign_rhs3 (stmt);
10128 if (!vect_is_simple_cond (cond_expr, vinfo, slp_node,
10129 &comp_vectype, &dts[0], vectype)
10130 || !comp_vectype)
10131 return false;
10133 if (!vect_is_simple_use (then_clause, vinfo, &dts[2], &vectype1))
10134 return false;
10135 if (!vect_is_simple_use (else_clause, vinfo, &dts[3], &vectype2))
10136 return false;
10138 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10139 return false;
10141 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10142 return false;
10144 masked = !COMPARISON_CLASS_P (cond_expr);
10145 vec_cmp_type = truth_type_for (comp_vectype);
10147 if (vec_cmp_type == NULL_TREE)
10148 return false;
10150 cond_code = TREE_CODE (cond_expr);
10151 if (!masked)
10153 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10154 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10157 /* For conditional reductions, the "then" value needs to be the candidate
10158 value calculated by this iteration while the "else" value needs to be
10159 the result carried over from previous iterations. If the COND_EXPR
10160 is the other way around, we need to swap it. */
10161 bool must_invert_cmp_result = false;
10162 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10164 if (masked)
10165 must_invert_cmp_result = true;
10166 else
10168 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10169 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10170 if (new_code == ERROR_MARK)
10171 must_invert_cmp_result = true;
10172 else
10174 cond_code = new_code;
10175 /* Make sure we don't accidentally use the old condition. */
10176 cond_expr = NULL_TREE;
10179 std::swap (then_clause, else_clause);
10182 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10184 /* Boolean values may have another representation in vectors
10185 and therefore we prefer bit operations over comparison for
10186 them (which also works for scalar masks). We store opcodes
10187 to use in bitop1 and bitop2. Statement is vectorized as
10188 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10189 depending on bitop1 and bitop2 arity. */
10190 switch (cond_code)
10192 case GT_EXPR:
10193 bitop1 = BIT_NOT_EXPR;
10194 bitop2 = BIT_AND_EXPR;
10195 break;
10196 case GE_EXPR:
10197 bitop1 = BIT_NOT_EXPR;
10198 bitop2 = BIT_IOR_EXPR;
10199 break;
10200 case LT_EXPR:
10201 bitop1 = BIT_NOT_EXPR;
10202 bitop2 = BIT_AND_EXPR;
10203 std::swap (cond_expr0, cond_expr1);
10204 break;
10205 case LE_EXPR:
10206 bitop1 = BIT_NOT_EXPR;
10207 bitop2 = BIT_IOR_EXPR;
10208 std::swap (cond_expr0, cond_expr1);
10209 break;
10210 case NE_EXPR:
10211 bitop1 = BIT_XOR_EXPR;
10212 break;
10213 case EQ_EXPR:
10214 bitop1 = BIT_XOR_EXPR;
10215 bitop2 = BIT_NOT_EXPR;
10216 break;
10217 default:
10218 return false;
10220 cond_code = SSA_NAME;
10223 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10224 && reduction_type == EXTRACT_LAST_REDUCTION
10225 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10227 if (dump_enabled_p ())
10228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10229 "reduction comparison operation not supported.\n");
10230 return false;
10233 if (!vec_stmt)
10235 if (bitop1 != NOP_EXPR)
10237 machine_mode mode = TYPE_MODE (comp_vectype);
10238 optab optab;
10240 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10241 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10242 return false;
10244 if (bitop2 != NOP_EXPR)
10246 optab = optab_for_tree_code (bitop2, comp_vectype,
10247 optab_default);
10248 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10249 return false;
10253 if (loop_vinfo
10254 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
10255 && reduction_type == EXTRACT_LAST_REDUCTION)
10256 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10257 ncopies * vec_num, vectype, NULL);
10259 vect_cost_for_stmt kind = vector_stmt;
10260 if (reduction_type == EXTRACT_LAST_REDUCTION)
10261 /* Count one reduction-like operation per vector. */
10262 kind = vec_to_scalar;
10263 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10264 return false;
10266 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10267 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10268 cost_vec, kind);
10269 return true;
10272 /* Transform. */
10274 if (!slp_node)
10276 vec_oprnds0.create (1);
10277 vec_oprnds1.create (1);
10278 vec_oprnds2.create (1);
10279 vec_oprnds3.create (1);
10282 /* Handle def. */
10283 scalar_dest = gimple_assign_lhs (stmt);
10284 if (reduction_type != EXTRACT_LAST_REDUCTION)
10285 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10287 /* Handle cond expr. */
10288 for (j = 0; j < ncopies; j++)
10290 bool swap_cond_operands = false;
10292 /* See whether another part of the vectorized code applies a loop
10293 mask to the condition, or to its inverse. */
10295 vec_loop_masks *masks = NULL;
10296 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10298 if (reduction_type == EXTRACT_LAST_REDUCTION)
10299 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10300 else
10302 scalar_cond_masked_key cond (cond_expr, ncopies);
10303 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10304 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10305 else
10307 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10308 cond.code = invert_tree_comparison (cond.code, honor_nans);
10309 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10311 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10312 cond_code = cond.code;
10313 swap_cond_operands = true;
10319 stmt_vec_info new_stmt_info = NULL;
10320 if (j == 0)
10322 if (slp_node)
10324 auto_vec<vec<tree>, 4> vec_defs;
10325 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
10326 vec_oprnds3 = vec_defs.pop ();
10327 vec_oprnds2 = vec_defs.pop ();
10328 if (!masked)
10329 vec_oprnds1 = vec_defs.pop ();
10330 vec_oprnds0 = vec_defs.pop ();
10332 else
10334 if (masked)
10336 vec_cond_lhs
10337 = vect_get_vec_def_for_operand (vinfo, cond_expr, stmt_info,
10338 comp_vectype);
10340 else
10342 vec_cond_lhs
10343 = vect_get_vec_def_for_operand (vinfo, cond_expr0,
10344 stmt_info, comp_vectype);
10345 vec_cond_rhs
10346 = vect_get_vec_def_for_operand (vinfo, cond_expr1,
10347 stmt_info, comp_vectype);
10349 vec_then_clause = vect_get_vec_def_for_operand (vinfo,
10350 then_clause,
10351 stmt_info);
10352 if (reduction_type != EXTRACT_LAST_REDUCTION)
10353 vec_else_clause = vect_get_vec_def_for_operand (vinfo,
10354 else_clause,
10355 stmt_info);
10358 else
10360 vec_cond_lhs
10361 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10362 if (!masked)
10363 vec_cond_rhs
10364 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10366 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10367 vec_oprnds2.pop ());
10368 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10369 vec_oprnds3.pop ());
10372 if (!slp_node)
10374 vec_oprnds0.quick_push (vec_cond_lhs);
10375 if (!masked)
10376 vec_oprnds1.quick_push (vec_cond_rhs);
10377 vec_oprnds2.quick_push (vec_then_clause);
10378 vec_oprnds3.quick_push (vec_else_clause);
10381 /* Arguments are ready. Create the new vector stmt. */
10382 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10384 vec_then_clause = vec_oprnds2[i];
10385 vec_else_clause = vec_oprnds3[i];
10387 if (swap_cond_operands)
10388 std::swap (vec_then_clause, vec_else_clause);
10390 if (masked)
10391 vec_compare = vec_cond_lhs;
10392 else
10394 vec_cond_rhs = vec_oprnds1[i];
10395 if (bitop1 == NOP_EXPR)
10396 vec_compare = build2 (cond_code, vec_cmp_type,
10397 vec_cond_lhs, vec_cond_rhs);
10398 else
10400 new_temp = make_ssa_name (vec_cmp_type);
10401 gassign *new_stmt;
10402 if (bitop1 == BIT_NOT_EXPR)
10403 new_stmt = gimple_build_assign (new_temp, bitop1,
10404 vec_cond_rhs);
10405 else
10406 new_stmt
10407 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10408 vec_cond_rhs);
10409 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10410 if (bitop2 == NOP_EXPR)
10411 vec_compare = new_temp;
10412 else if (bitop2 == BIT_NOT_EXPR)
10414 /* Instead of doing ~x ? y : z do x ? z : y. */
10415 vec_compare = new_temp;
10416 std::swap (vec_then_clause, vec_else_clause);
10418 else
10420 vec_compare = make_ssa_name (vec_cmp_type);
10421 new_stmt
10422 = gimple_build_assign (vec_compare, bitop2,
10423 vec_cond_lhs, new_temp);
10424 vect_finish_stmt_generation (vinfo, stmt_info,
10425 new_stmt, gsi);
10430 /* If we decided to apply a loop mask to the result of the vector
10431 comparison, AND the comparison with the mask now. Later passes
10432 should then be able to reuse the AND results between mulitple
10433 vector statements.
10435 For example:
10436 for (int i = 0; i < 100; ++i)
10437 x[i] = y[i] ? z[i] : 10;
10439 results in following optimized GIMPLE:
10441 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10442 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10443 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10444 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10445 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10446 vect_iftmp.11_47, { 10, ... }>;
10448 instead of using a masked and unmasked forms of
10449 vec != { 0, ... } (masked in the MASK_LOAD,
10450 unmasked in the VEC_COND_EXPR). */
10452 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10453 in cases where that's necessary. */
10455 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10457 if (!is_gimple_val (vec_compare))
10459 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10460 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10461 vec_compare);
10462 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10463 vec_compare = vec_compare_name;
10466 if (must_invert_cmp_result)
10468 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10469 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10470 BIT_NOT_EXPR,
10471 vec_compare);
10472 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10473 vec_compare = vec_compare_name;
10476 if (masks)
10478 unsigned vec_num = vec_oprnds0.length ();
10479 tree loop_mask
10480 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10481 vectype, vec_num * j + i);
10482 tree tmp2 = make_ssa_name (vec_cmp_type);
10483 gassign *g
10484 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10485 loop_mask);
10486 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10487 vec_compare = tmp2;
10491 if (reduction_type == EXTRACT_LAST_REDUCTION)
10493 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10494 tree lhs = gimple_get_lhs (old_stmt);
10495 gcall *new_stmt = gimple_build_call_internal
10496 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10497 vec_then_clause);
10498 gimple_call_set_lhs (new_stmt, lhs);
10499 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10500 if (old_stmt == gsi_stmt (*gsi))
10501 new_stmt_info = vect_finish_replace_stmt (vinfo,
10502 stmt_info, new_stmt);
10503 else
10505 /* In this case we're moving the definition to later in the
10506 block. That doesn't matter because the only uses of the
10507 lhs are in phi statements. */
10508 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10509 gsi_remove (&old_gsi, true);
10510 new_stmt_info
10511 = vect_finish_stmt_generation (vinfo, stmt_info,
10512 new_stmt, gsi);
10515 else
10517 new_temp = make_ssa_name (vec_dest);
10518 gassign *new_stmt
10519 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10520 vec_then_clause, vec_else_clause);
10521 new_stmt_info
10522 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10524 if (slp_node)
10525 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10528 if (slp_node)
10529 continue;
10531 if (j == 0)
10532 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10533 else
10534 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10536 prev_stmt_info = new_stmt_info;
10539 vec_oprnds0.release ();
10540 vec_oprnds1.release ();
10541 vec_oprnds2.release ();
10542 vec_oprnds3.release ();
10544 return true;
10547 /* vectorizable_comparison.
10549 Check if STMT_INFO is comparison expression that can be vectorized.
10550 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10551 comparison, put it in VEC_STMT, and insert it at GSI.
10553 Return true if STMT_INFO is vectorizable in this way. */
10555 static bool
10556 vectorizable_comparison (vec_info *vinfo,
10557 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10558 stmt_vec_info *vec_stmt,
10559 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10561 tree lhs, rhs1, rhs2;
10562 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10563 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10564 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10565 tree new_temp;
10566 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10567 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10568 int ndts = 2;
10569 poly_uint64 nunits;
10570 int ncopies;
10571 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10572 stmt_vec_info prev_stmt_info = NULL;
10573 int i, j;
10574 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10575 vec<tree> vec_oprnds0 = vNULL;
10576 vec<tree> vec_oprnds1 = vNULL;
10577 tree mask_type;
10578 tree mask;
10580 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10581 return false;
10583 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10584 return false;
10586 mask_type = vectype;
10587 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10589 if (slp_node)
10590 ncopies = 1;
10591 else
10592 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10594 gcc_assert (ncopies >= 1);
10595 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10596 return false;
10598 if (STMT_VINFO_LIVE_P (stmt_info))
10600 if (dump_enabled_p ())
10601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10602 "value used after loop.\n");
10603 return false;
10606 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10607 if (!stmt)
10608 return false;
10610 code = gimple_assign_rhs_code (stmt);
10612 if (TREE_CODE_CLASS (code) != tcc_comparison)
10613 return false;
10615 rhs1 = gimple_assign_rhs1 (stmt);
10616 rhs2 = gimple_assign_rhs2 (stmt);
10618 if (!vect_is_simple_use (rhs1, vinfo, &dts[0], &vectype1))
10619 return false;
10621 if (!vect_is_simple_use (rhs2, vinfo, &dts[1], &vectype2))
10622 return false;
10624 if (vectype1 && vectype2
10625 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10626 TYPE_VECTOR_SUBPARTS (vectype2)))
10627 return false;
10629 vectype = vectype1 ? vectype1 : vectype2;
10631 /* Invariant comparison. */
10632 if (!vectype)
10634 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10635 vectype = mask_type;
10636 else
10637 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10638 slp_node);
10639 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10640 return false;
10642 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10643 return false;
10645 /* Can't compare mask and non-mask types. */
10646 if (vectype1 && vectype2
10647 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10648 return false;
10650 /* Boolean values may have another representation in vectors
10651 and therefore we prefer bit operations over comparison for
10652 them (which also works for scalar masks). We store opcodes
10653 to use in bitop1 and bitop2. Statement is vectorized as
10654 BITOP2 (rhs1 BITOP1 rhs2) or
10655 rhs1 BITOP2 (BITOP1 rhs2)
10656 depending on bitop1 and bitop2 arity. */
10657 bool swap_p = false;
10658 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10660 if (code == GT_EXPR)
10662 bitop1 = BIT_NOT_EXPR;
10663 bitop2 = BIT_AND_EXPR;
10665 else if (code == GE_EXPR)
10667 bitop1 = BIT_NOT_EXPR;
10668 bitop2 = BIT_IOR_EXPR;
10670 else if (code == LT_EXPR)
10672 bitop1 = BIT_NOT_EXPR;
10673 bitop2 = BIT_AND_EXPR;
10674 swap_p = true;
10676 else if (code == LE_EXPR)
10678 bitop1 = BIT_NOT_EXPR;
10679 bitop2 = BIT_IOR_EXPR;
10680 swap_p = true;
10682 else
10684 bitop1 = BIT_XOR_EXPR;
10685 if (code == EQ_EXPR)
10686 bitop2 = BIT_NOT_EXPR;
10690 if (!vec_stmt)
10692 if (bitop1 == NOP_EXPR)
10694 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10695 return false;
10697 else
10699 machine_mode mode = TYPE_MODE (vectype);
10700 optab optab;
10702 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10703 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10704 return false;
10706 if (bitop2 != NOP_EXPR)
10708 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10709 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10710 return false;
10714 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10715 vect_model_simple_cost (vinfo, stmt_info,
10716 ncopies * (1 + (bitop2 != NOP_EXPR)),
10717 dts, ndts, slp_node, cost_vec);
10718 return true;
10721 /* Transform. */
10722 if (!slp_node)
10724 vec_oprnds0.create (1);
10725 vec_oprnds1.create (1);
10728 /* Handle def. */
10729 lhs = gimple_assign_lhs (stmt);
10730 mask = vect_create_destination_var (lhs, mask_type);
10732 /* Handle cmp expr. */
10733 for (j = 0; j < ncopies; j++)
10735 stmt_vec_info new_stmt_info = NULL;
10736 if (j == 0)
10738 if (slp_node)
10740 auto_vec<vec<tree>, 2> vec_defs;
10741 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
10742 vec_oprnds1 = vec_defs.pop ();
10743 vec_oprnds0 = vec_defs.pop ();
10744 if (swap_p)
10745 std::swap (vec_oprnds0, vec_oprnds1);
10747 else
10749 vec_rhs1 = vect_get_vec_def_for_operand (vinfo, rhs1, stmt_info,
10750 vectype);
10751 vec_rhs2 = vect_get_vec_def_for_operand (vinfo, rhs2, stmt_info,
10752 vectype);
10755 else
10757 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10758 vec_oprnds0.pop ());
10759 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10760 vec_oprnds1.pop ());
10763 if (!slp_node)
10765 if (swap_p && j == 0)
10766 std::swap (vec_rhs1, vec_rhs2);
10767 vec_oprnds0.quick_push (vec_rhs1);
10768 vec_oprnds1.quick_push (vec_rhs2);
10771 /* Arguments are ready. Create the new vector stmt. */
10772 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10774 vec_rhs2 = vec_oprnds1[i];
10776 new_temp = make_ssa_name (mask);
10777 if (bitop1 == NOP_EXPR)
10779 gassign *new_stmt = gimple_build_assign (new_temp, code,
10780 vec_rhs1, vec_rhs2);
10781 new_stmt_info
10782 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10784 else
10786 gassign *new_stmt;
10787 if (bitop1 == BIT_NOT_EXPR)
10788 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10789 else
10790 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10791 vec_rhs2);
10792 new_stmt_info
10793 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10794 if (bitop2 != NOP_EXPR)
10796 tree res = make_ssa_name (mask);
10797 if (bitop2 == BIT_NOT_EXPR)
10798 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10799 else
10800 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10801 new_temp);
10802 new_stmt_info
10803 = vect_finish_stmt_generation (vinfo, stmt_info,
10804 new_stmt, gsi);
10807 if (slp_node)
10808 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10811 if (slp_node)
10812 continue;
10814 if (j == 0)
10815 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10816 else
10817 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10819 prev_stmt_info = new_stmt_info;
10822 vec_oprnds0.release ();
10823 vec_oprnds1.release ();
10825 return true;
10828 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10829 can handle all live statements in the node. Otherwise return true
10830 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10831 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10833 static bool
10834 can_vectorize_live_stmts (loop_vec_info loop_vinfo,
10835 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10836 slp_tree slp_node, slp_instance slp_node_instance,
10837 bool vec_stmt_p,
10838 stmt_vector_for_cost *cost_vec)
10840 if (slp_node)
10842 stmt_vec_info slp_stmt_info;
10843 unsigned int i;
10844 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10846 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10847 && !vectorizable_live_operation (loop_vinfo,
10848 slp_stmt_info, gsi, slp_node,
10849 slp_node_instance, i,
10850 vec_stmt_p, cost_vec))
10851 return false;
10854 else if (STMT_VINFO_LIVE_P (stmt_info)
10855 && !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
10856 slp_node, slp_node_instance, -1,
10857 vec_stmt_p, cost_vec))
10858 return false;
10860 return true;
10863 /* Make sure the statement is vectorizable. */
10865 opt_result
10866 vect_analyze_stmt (vec_info *vinfo,
10867 stmt_vec_info stmt_info, bool *need_to_vectorize,
10868 slp_tree node, slp_instance node_instance,
10869 stmt_vector_for_cost *cost_vec)
10871 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10872 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10873 bool ok;
10874 gimple_seq pattern_def_seq;
10876 if (dump_enabled_p ())
10877 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10878 stmt_info->stmt);
10880 if (gimple_has_volatile_ops (stmt_info->stmt))
10881 return opt_result::failure_at (stmt_info->stmt,
10882 "not vectorized:"
10883 " stmt has volatile operands: %G\n",
10884 stmt_info->stmt);
10886 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10887 && node == NULL
10888 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10890 gimple_stmt_iterator si;
10892 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10894 stmt_vec_info pattern_def_stmt_info
10895 = vinfo->lookup_stmt (gsi_stmt (si));
10896 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10897 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10899 /* Analyze def stmt of STMT if it's a pattern stmt. */
10900 if (dump_enabled_p ())
10901 dump_printf_loc (MSG_NOTE, vect_location,
10902 "==> examining pattern def statement: %G",
10903 pattern_def_stmt_info->stmt);
10905 opt_result res
10906 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10907 need_to_vectorize, node, node_instance,
10908 cost_vec);
10909 if (!res)
10910 return res;
10915 /* Skip stmts that do not need to be vectorized. In loops this is expected
10916 to include:
10917 - the COND_EXPR which is the loop exit condition
10918 - any LABEL_EXPRs in the loop
10919 - computations that are used only for array indexing or loop control.
10920 In basic blocks we only analyze statements that are a part of some SLP
10921 instance, therefore, all the statements are relevant.
10923 Pattern statement needs to be analyzed instead of the original statement
10924 if the original statement is not relevant. Otherwise, we analyze both
10925 statements. In basic blocks we are called from some SLP instance
10926 traversal, don't analyze pattern stmts instead, the pattern stmts
10927 already will be part of SLP instance. */
10929 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10930 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10931 && !STMT_VINFO_LIVE_P (stmt_info))
10933 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10934 && pattern_stmt_info
10935 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10936 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10938 /* Analyze PATTERN_STMT instead of the original stmt. */
10939 stmt_info = pattern_stmt_info;
10940 if (dump_enabled_p ())
10941 dump_printf_loc (MSG_NOTE, vect_location,
10942 "==> examining pattern statement: %G",
10943 stmt_info->stmt);
10945 else
10947 if (dump_enabled_p ())
10948 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10950 return opt_result::success ();
10953 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10954 && node == NULL
10955 && pattern_stmt_info
10956 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10957 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10959 /* Analyze PATTERN_STMT too. */
10960 if (dump_enabled_p ())
10961 dump_printf_loc (MSG_NOTE, vect_location,
10962 "==> examining pattern statement: %G",
10963 pattern_stmt_info->stmt);
10965 opt_result res
10966 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10967 node_instance, cost_vec);
10968 if (!res)
10969 return res;
10972 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10974 case vect_internal_def:
10975 break;
10977 case vect_reduction_def:
10978 case vect_nested_cycle:
10979 gcc_assert (!bb_vinfo
10980 && (relevance == vect_used_in_outer
10981 || relevance == vect_used_in_outer_by_reduction
10982 || relevance == vect_used_by_reduction
10983 || relevance == vect_unused_in_scope
10984 || relevance == vect_used_only_live));
10985 break;
10987 case vect_induction_def:
10988 gcc_assert (!bb_vinfo);
10989 break;
10991 case vect_constant_def:
10992 case vect_external_def:
10993 case vect_unknown_def_type:
10994 default:
10995 gcc_unreachable ();
10998 if (STMT_VINFO_RELEVANT_P (stmt_info))
11000 tree type = gimple_expr_type (stmt_info->stmt);
11001 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
11002 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11003 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11004 || (call && gimple_call_lhs (call) == NULL_TREE));
11005 *need_to_vectorize = true;
11008 if (PURE_SLP_STMT (stmt_info) && !node)
11010 if (dump_enabled_p ())
11011 dump_printf_loc (MSG_NOTE, vect_location,
11012 "handled only by SLP analysis\n");
11013 return opt_result::success ();
11016 ok = true;
11017 if (!bb_vinfo
11018 && (STMT_VINFO_RELEVANT_P (stmt_info)
11019 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11020 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11021 -mveclibabi= takes preference over library functions with
11022 the simd attribute. */
11023 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11024 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11025 cost_vec)
11026 || vectorizable_conversion (vinfo, stmt_info,
11027 NULL, NULL, node, cost_vec)
11028 || vectorizable_operation (vinfo, stmt_info,
11029 NULL, NULL, node, cost_vec)
11030 || vectorizable_assignment (vinfo, stmt_info,
11031 NULL, NULL, node, cost_vec)
11032 || vectorizable_load (vinfo, stmt_info,
11033 NULL, NULL, node, node_instance, cost_vec)
11034 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11035 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11036 node, node_instance, cost_vec)
11037 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11038 NULL, NULL, node, cost_vec)
11039 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11040 || vectorizable_condition (vinfo, stmt_info,
11041 NULL, NULL, node, cost_vec)
11042 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11043 cost_vec)
11044 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11045 stmt_info, NULL, node));
11046 else
11048 if (bb_vinfo)
11049 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11050 || vectorizable_simd_clone_call (vinfo, stmt_info,
11051 NULL, NULL, node, cost_vec)
11052 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11053 cost_vec)
11054 || vectorizable_shift (vinfo, stmt_info,
11055 NULL, NULL, node, cost_vec)
11056 || vectorizable_operation (vinfo, stmt_info,
11057 NULL, NULL, node, cost_vec)
11058 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11059 cost_vec)
11060 || vectorizable_load (vinfo, stmt_info,
11061 NULL, NULL, node, node_instance, cost_vec)
11062 || vectorizable_store (vinfo, stmt_info,
11063 NULL, NULL, node, cost_vec)
11064 || vectorizable_condition (vinfo, stmt_info,
11065 NULL, NULL, node, cost_vec)
11066 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11067 cost_vec));
11070 if (!ok)
11071 return opt_result::failure_at (stmt_info->stmt,
11072 "not vectorized:"
11073 " relevant stmt not supported: %G",
11074 stmt_info->stmt);
11076 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11077 need extra handling, except for vectorizable reductions. */
11078 if (!bb_vinfo
11079 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11080 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11081 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11082 stmt_info, NULL, node, node_instance,
11083 false, cost_vec))
11084 return opt_result::failure_at (stmt_info->stmt,
11085 "not vectorized:"
11086 " live stmt not supported: %G",
11087 stmt_info->stmt);
11089 return opt_result::success ();
11093 /* Function vect_transform_stmt.
11095 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11097 bool
11098 vect_transform_stmt (vec_info *vinfo,
11099 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11100 slp_tree slp_node, slp_instance slp_node_instance)
11102 bool is_store = false;
11103 stmt_vec_info vec_stmt = NULL;
11104 bool done;
11106 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11107 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
11109 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11110 bool nested_p = (loop_vinfo
11111 && nested_in_vect_loop_p
11112 (LOOP_VINFO_LOOP (loop_vinfo), stmt_info));
11114 gimple *stmt = stmt_info->stmt;
11115 switch (STMT_VINFO_TYPE (stmt_info))
11117 case type_demotion_vec_info_type:
11118 case type_promotion_vec_info_type:
11119 case type_conversion_vec_info_type:
11120 done = vectorizable_conversion (vinfo, stmt_info,
11121 gsi, &vec_stmt, slp_node, NULL);
11122 gcc_assert (done);
11123 break;
11125 case induc_vec_info_type:
11126 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11127 stmt_info, gsi, &vec_stmt, slp_node,
11128 NULL);
11129 gcc_assert (done);
11130 break;
11132 case shift_vec_info_type:
11133 done = vectorizable_shift (vinfo, stmt_info,
11134 gsi, &vec_stmt, slp_node, NULL);
11135 gcc_assert (done);
11136 break;
11138 case op_vec_info_type:
11139 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11140 NULL);
11141 gcc_assert (done);
11142 break;
11144 case assignment_vec_info_type:
11145 done = vectorizable_assignment (vinfo, stmt_info,
11146 gsi, &vec_stmt, slp_node, NULL);
11147 gcc_assert (done);
11148 break;
11150 case load_vec_info_type:
11151 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11152 slp_node_instance, NULL);
11153 gcc_assert (done);
11154 break;
11156 case store_vec_info_type:
11157 done = vectorizable_store (vinfo, stmt_info,
11158 gsi, &vec_stmt, slp_node, NULL);
11159 gcc_assert (done);
11160 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11162 /* In case of interleaving, the whole chain is vectorized when the
11163 last store in the chain is reached. Store stmts before the last
11164 one are skipped, and there vec_stmt_info shouldn't be freed
11165 meanwhile. */
11166 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11167 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11168 is_store = true;
11170 else
11171 is_store = true;
11172 break;
11174 case condition_vec_info_type:
11175 done = vectorizable_condition (vinfo, stmt_info,
11176 gsi, &vec_stmt, slp_node, NULL);
11177 gcc_assert (done);
11178 break;
11180 case comparison_vec_info_type:
11181 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11182 slp_node, NULL);
11183 gcc_assert (done);
11184 break;
11186 case call_vec_info_type:
11187 done = vectorizable_call (vinfo, stmt_info,
11188 gsi, &vec_stmt, slp_node, NULL);
11189 stmt = gsi_stmt (*gsi);
11190 break;
11192 case call_simd_clone_vec_info_type:
11193 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11194 slp_node, NULL);
11195 stmt = gsi_stmt (*gsi);
11196 break;
11198 case reduc_vec_info_type:
11199 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11200 gsi, &vec_stmt, slp_node);
11201 gcc_assert (done);
11202 break;
11204 case cycle_phi_info_type:
11205 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11206 &vec_stmt, slp_node, slp_node_instance);
11207 gcc_assert (done);
11208 break;
11210 case lc_phi_info_type:
11211 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11212 stmt_info, &vec_stmt, slp_node);
11213 gcc_assert (done);
11214 break;
11216 default:
11217 if (!STMT_VINFO_LIVE_P (stmt_info))
11219 if (dump_enabled_p ())
11220 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11221 "stmt not supported.\n");
11222 gcc_unreachable ();
11224 done = true;
11227 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11228 This would break hybrid SLP vectorization. */
11229 if (slp_node)
11230 gcc_assert (!vec_stmt
11231 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11233 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11234 is being vectorized, but outside the immediately enclosing loop. */
11235 if (vec_stmt
11236 && nested_p
11237 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11238 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11239 || STMT_VINFO_RELEVANT (stmt_info) ==
11240 vect_used_in_outer_by_reduction))
11242 class loop *innerloop = LOOP_VINFO_LOOP (loop_vinfo)->inner;
11243 imm_use_iterator imm_iter;
11244 use_operand_p use_p;
11245 tree scalar_dest;
11247 if (dump_enabled_p ())
11248 dump_printf_loc (MSG_NOTE, vect_location,
11249 "Record the vdef for outer-loop vectorization.\n");
11251 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11252 (to be used when vectorizing outer-loop stmts that use the DEF of
11253 STMT). */
11254 if (gimple_code (stmt) == GIMPLE_PHI)
11255 scalar_dest = PHI_RESULT (stmt);
11256 else
11257 scalar_dest = gimple_get_lhs (stmt);
11259 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11260 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11262 stmt_vec_info exit_phi_info
11263 = vinfo->lookup_stmt (USE_STMT (use_p));
11264 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11268 if (vec_stmt)
11269 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11271 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11272 return is_store;
11274 /* If this stmt defines a value used on a backedge, update the
11275 vectorized PHIs. */
11276 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11277 stmt_vec_info reduc_info;
11278 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11279 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11280 && (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
11281 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11282 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11284 gphi *phi;
11285 edge e;
11286 if (!slp_node
11287 && (phi = dyn_cast <gphi *>
11288 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11289 && dominated_by_p (CDI_DOMINATORS,
11290 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11291 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11292 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11293 == gimple_get_lhs (orig_stmt_info->stmt)))
11295 stmt_vec_info phi_info
11296 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11297 stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11300 add_phi_arg (as_a <gphi *> (phi_info->stmt),
11301 gimple_get_lhs (vec_stmt->stmt), e,
11302 gimple_phi_arg_location (phi, e->dest_idx));
11303 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11304 vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11306 while (phi_info);
11307 gcc_assert (!vec_stmt);
11309 else if (slp_node
11310 && slp_node != slp_node_instance->reduc_phis)
11312 slp_tree phi_node = slp_node_instance->reduc_phis;
11313 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11314 e = loop_latch_edge (gimple_bb (phi)->loop_father);
11315 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11316 == SLP_TREE_VEC_STMTS (slp_node).length ());
11317 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11318 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11319 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
11320 e, gimple_phi_arg_location (phi, e->dest_idx));
11324 /* Handle stmts whose DEF is used outside the loop-nest that is
11325 being vectorized. */
11326 if (is_a <loop_vec_info> (vinfo))
11327 done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11328 stmt_info, gsi, slp_node,
11329 slp_node_instance, true, NULL);
11330 gcc_assert (done);
11332 return false;
11336 /* Remove a group of stores (for SLP or interleaving), free their
11337 stmt_vec_info. */
11339 void
11340 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11342 stmt_vec_info next_stmt_info = first_stmt_info;
11344 while (next_stmt_info)
11346 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11347 next_stmt_info = vect_orig_stmt (next_stmt_info);
11348 /* Free the attached stmt_vec_info and remove the stmt. */
11349 vinfo->remove_stmt (next_stmt_info);
11350 next_stmt_info = tmp;
11354 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11355 elements of type SCALAR_TYPE, or null if the target doesn't support
11356 such a type.
11358 If NUNITS is zero, return a vector type that contains elements of
11359 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11361 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11362 for this vectorization region and want to "autodetect" the best choice.
11363 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11364 and we want the new type to be interoperable with it. PREVAILING_MODE
11365 in this case can be a scalar integer mode or a vector mode; when it
11366 is a vector mode, the function acts like a tree-level version of
11367 related_vector_mode. */
11369 tree
11370 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11371 tree scalar_type, poly_uint64 nunits)
11373 tree orig_scalar_type = scalar_type;
11374 scalar_mode inner_mode;
11375 machine_mode simd_mode;
11376 tree vectype;
11378 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11379 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11380 return NULL_TREE;
11382 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11384 /* For vector types of elements whose mode precision doesn't
11385 match their types precision we use a element type of mode
11386 precision. The vectorization routines will have to make sure
11387 they support the proper result truncation/extension.
11388 We also make sure to build vector types with INTEGER_TYPE
11389 component type only. */
11390 if (INTEGRAL_TYPE_P (scalar_type)
11391 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11392 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11393 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11394 TYPE_UNSIGNED (scalar_type));
11396 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11397 When the component mode passes the above test simply use a type
11398 corresponding to that mode. The theory is that any use that
11399 would cause problems with this will disable vectorization anyway. */
11400 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11401 && !INTEGRAL_TYPE_P (scalar_type))
11402 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11404 /* We can't build a vector type of elements with alignment bigger than
11405 their size. */
11406 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11407 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11408 TYPE_UNSIGNED (scalar_type));
11410 /* If we felt back to using the mode fail if there was
11411 no scalar type for it. */
11412 if (scalar_type == NULL_TREE)
11413 return NULL_TREE;
11415 /* If no prevailing mode was supplied, use the mode the target prefers.
11416 Otherwise lookup a vector mode based on the prevailing mode. */
11417 if (prevailing_mode == VOIDmode)
11419 gcc_assert (known_eq (nunits, 0U));
11420 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11421 if (SCALAR_INT_MODE_P (simd_mode))
11423 /* Traditional behavior is not to take the integer mode
11424 literally, but simply to use it as a way of determining
11425 the vector size. It is up to mode_for_vector to decide
11426 what the TYPE_MODE should be.
11428 Note that nunits == 1 is allowed in order to support single
11429 element vector types. */
11430 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11431 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11432 return NULL_TREE;
11435 else if (SCALAR_INT_MODE_P (prevailing_mode)
11436 || !related_vector_mode (prevailing_mode,
11437 inner_mode, nunits).exists (&simd_mode))
11439 /* Fall back to using mode_for_vector, mostly in the hope of being
11440 able to use an integer mode. */
11441 if (known_eq (nunits, 0U)
11442 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11443 return NULL_TREE;
11445 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11446 return NULL_TREE;
11449 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11451 /* In cases where the mode was chosen by mode_for_vector, check that
11452 the target actually supports the chosen mode, or that it at least
11453 allows the vector mode to be replaced by a like-sized integer. */
11454 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11455 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11456 return NULL_TREE;
11458 /* Re-attach the address-space qualifier if we canonicalized the scalar
11459 type. */
11460 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11461 return build_qualified_type
11462 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11464 return vectype;
11467 /* Function get_vectype_for_scalar_type.
11469 Returns the vector type corresponding to SCALAR_TYPE as supported
11470 by the target. If GROUP_SIZE is nonzero and we're performing BB
11471 vectorization, make sure that the number of elements in the vector
11472 is no bigger than GROUP_SIZE. */
11474 tree
11475 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11476 unsigned int group_size)
11478 /* For BB vectorization, we should always have a group size once we've
11479 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11480 are tentative requests during things like early data reference
11481 analysis and pattern recognition. */
11482 if (is_a <bb_vec_info> (vinfo))
11483 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11484 else
11485 group_size = 0;
11487 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11488 scalar_type);
11489 if (vectype && vinfo->vector_mode == VOIDmode)
11490 vinfo->vector_mode = TYPE_MODE (vectype);
11492 /* Register the natural choice of vector type, before the group size
11493 has been applied. */
11494 if (vectype)
11495 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11497 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11498 try again with an explicit number of elements. */
11499 if (vectype
11500 && group_size
11501 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11503 /* Start with the biggest number of units that fits within
11504 GROUP_SIZE and halve it until we find a valid vector type.
11505 Usually either the first attempt will succeed or all will
11506 fail (in the latter case because GROUP_SIZE is too small
11507 for the target), but it's possible that a target could have
11508 a hole between supported vector types.
11510 If GROUP_SIZE is not a power of 2, this has the effect of
11511 trying the largest power of 2 that fits within the group,
11512 even though the group is not a multiple of that vector size.
11513 The BB vectorizer will then try to carve up the group into
11514 smaller pieces. */
11515 unsigned int nunits = 1 << floor_log2 (group_size);
11518 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11519 scalar_type, nunits);
11520 nunits /= 2;
11522 while (nunits > 1 && !vectype);
11525 return vectype;
11528 /* Return the vector type corresponding to SCALAR_TYPE as supported
11529 by the target. NODE, if nonnull, is the SLP tree node that will
11530 use the returned vector type. */
11532 tree
11533 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11535 unsigned int group_size = 0;
11536 if (node)
11538 group_size = SLP_TREE_SCALAR_OPS (node).length ();
11539 if (group_size == 0)
11540 group_size = SLP_TREE_SCALAR_STMTS (node).length ();
11542 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11545 /* Function get_mask_type_for_scalar_type.
11547 Returns the mask type corresponding to a result of comparison
11548 of vectors of specified SCALAR_TYPE as supported by target.
11549 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11550 make sure that the number of elements in the vector is no bigger
11551 than GROUP_SIZE. */
11553 tree
11554 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11555 unsigned int group_size)
11557 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11559 if (!vectype)
11560 return NULL;
11562 return truth_type_for (vectype);
11565 /* Function get_same_sized_vectype
11567 Returns a vector type corresponding to SCALAR_TYPE of size
11568 VECTOR_TYPE if supported by the target. */
11570 tree
11571 get_same_sized_vectype (tree scalar_type, tree vector_type)
11573 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11574 return truth_type_for (vector_type);
11576 poly_uint64 nunits;
11577 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11578 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11579 return NULL_TREE;
11581 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11582 scalar_type, nunits);
11585 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11586 would not change the chosen vector modes. */
11588 bool
11589 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11591 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11592 i != vinfo->used_vector_modes.end (); ++i)
11593 if (!VECTOR_MODE_P (*i)
11594 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11595 return false;
11596 return true;
11599 /* Function vect_is_simple_use.
11601 Input:
11602 VINFO - the vect info of the loop or basic block that is being vectorized.
11603 OPERAND - operand in the loop or bb.
11604 Output:
11605 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11606 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11607 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11608 the definition could be anywhere in the function
11609 DT - the type of definition
11611 Returns whether a stmt with OPERAND can be vectorized.
11612 For loops, supportable operands are constants, loop invariants, and operands
11613 that are defined by the current iteration of the loop. Unsupportable
11614 operands are those that are defined by a previous iteration of the loop (as
11615 is the case in reduction/induction computations).
11616 For basic blocks, supportable operands are constants and bb invariants.
11617 For now, operands defined outside the basic block are not supported. */
11619 bool
11620 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11621 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11623 if (def_stmt_info_out)
11624 *def_stmt_info_out = NULL;
11625 if (def_stmt_out)
11626 *def_stmt_out = NULL;
11627 *dt = vect_unknown_def_type;
11629 if (dump_enabled_p ())
11631 dump_printf_loc (MSG_NOTE, vect_location,
11632 "vect_is_simple_use: operand ");
11633 if (TREE_CODE (operand) == SSA_NAME
11634 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11635 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11636 else
11637 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11640 if (CONSTANT_CLASS_P (operand))
11641 *dt = vect_constant_def;
11642 else if (is_gimple_min_invariant (operand))
11643 *dt = vect_external_def;
11644 else if (TREE_CODE (operand) != SSA_NAME)
11645 *dt = vect_unknown_def_type;
11646 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11647 *dt = vect_external_def;
11648 else
11650 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11651 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11652 if (!stmt_vinfo)
11653 *dt = vect_external_def;
11654 else
11656 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11657 def_stmt = stmt_vinfo->stmt;
11658 switch (gimple_code (def_stmt))
11660 case GIMPLE_PHI:
11661 case GIMPLE_ASSIGN:
11662 case GIMPLE_CALL:
11663 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11664 break;
11665 default:
11666 *dt = vect_unknown_def_type;
11667 break;
11669 if (def_stmt_info_out)
11670 *def_stmt_info_out = stmt_vinfo;
11672 if (def_stmt_out)
11673 *def_stmt_out = def_stmt;
11676 if (dump_enabled_p ())
11678 dump_printf (MSG_NOTE, ", type of def: ");
11679 switch (*dt)
11681 case vect_uninitialized_def:
11682 dump_printf (MSG_NOTE, "uninitialized\n");
11683 break;
11684 case vect_constant_def:
11685 dump_printf (MSG_NOTE, "constant\n");
11686 break;
11687 case vect_external_def:
11688 dump_printf (MSG_NOTE, "external\n");
11689 break;
11690 case vect_internal_def:
11691 dump_printf (MSG_NOTE, "internal\n");
11692 break;
11693 case vect_induction_def:
11694 dump_printf (MSG_NOTE, "induction\n");
11695 break;
11696 case vect_reduction_def:
11697 dump_printf (MSG_NOTE, "reduction\n");
11698 break;
11699 case vect_double_reduction_def:
11700 dump_printf (MSG_NOTE, "double reduction\n");
11701 break;
11702 case vect_nested_cycle:
11703 dump_printf (MSG_NOTE, "nested cycle\n");
11704 break;
11705 case vect_unknown_def_type:
11706 dump_printf (MSG_NOTE, "unknown\n");
11707 break;
11711 if (*dt == vect_unknown_def_type)
11713 if (dump_enabled_p ())
11714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11715 "Unsupported pattern.\n");
11716 return false;
11719 return true;
11722 /* Function vect_is_simple_use.
11724 Same as vect_is_simple_use but also determines the vector operand
11725 type of OPERAND and stores it to *VECTYPE. If the definition of
11726 OPERAND is vect_uninitialized_def, vect_constant_def or
11727 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11728 is responsible to compute the best suited vector type for the
11729 scalar operand. */
11731 bool
11732 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11733 tree *vectype, stmt_vec_info *def_stmt_info_out,
11734 gimple **def_stmt_out)
11736 stmt_vec_info def_stmt_info;
11737 gimple *def_stmt;
11738 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11739 return false;
11741 if (def_stmt_out)
11742 *def_stmt_out = def_stmt;
11743 if (def_stmt_info_out)
11744 *def_stmt_info_out = def_stmt_info;
11746 /* Now get a vector type if the def is internal, otherwise supply
11747 NULL_TREE and leave it up to the caller to figure out a proper
11748 type for the use stmt. */
11749 if (*dt == vect_internal_def
11750 || *dt == vect_induction_def
11751 || *dt == vect_reduction_def
11752 || *dt == vect_double_reduction_def
11753 || *dt == vect_nested_cycle)
11755 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11756 gcc_assert (*vectype != NULL_TREE);
11757 if (dump_enabled_p ())
11758 dump_printf_loc (MSG_NOTE, vect_location,
11759 "vect_is_simple_use: vectype %T\n", *vectype);
11761 else if (*dt == vect_uninitialized_def
11762 || *dt == vect_constant_def
11763 || *dt == vect_external_def)
11764 *vectype = NULL_TREE;
11765 else
11766 gcc_unreachable ();
11768 return true;
11772 /* Function supportable_widening_operation
11774 Check whether an operation represented by the code CODE is a
11775 widening operation that is supported by the target platform in
11776 vector form (i.e., when operating on arguments of type VECTYPE_IN
11777 producing a result of type VECTYPE_OUT).
11779 Widening operations we currently support are NOP (CONVERT), FLOAT,
11780 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11781 are supported by the target platform either directly (via vector
11782 tree-codes), or via target builtins.
11784 Output:
11785 - CODE1 and CODE2 are codes of vector operations to be used when
11786 vectorizing the operation, if available.
11787 - MULTI_STEP_CVT determines the number of required intermediate steps in
11788 case of multi-step conversion (like char->short->int - in that case
11789 MULTI_STEP_CVT will be 1).
11790 - INTERM_TYPES contains the intermediate type required to perform the
11791 widening operation (short in the above example). */
11793 bool
11794 supportable_widening_operation (vec_info *vinfo,
11795 enum tree_code code, stmt_vec_info stmt_info,
11796 tree vectype_out, tree vectype_in,
11797 enum tree_code *code1, enum tree_code *code2,
11798 int *multi_step_cvt,
11799 vec<tree> *interm_types)
11801 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11802 class loop *vect_loop = NULL;
11803 machine_mode vec_mode;
11804 enum insn_code icode1, icode2;
11805 optab optab1, optab2;
11806 tree vectype = vectype_in;
11807 tree wide_vectype = vectype_out;
11808 enum tree_code c1, c2;
11809 int i;
11810 tree prev_type, intermediate_type;
11811 machine_mode intermediate_mode, prev_mode;
11812 optab optab3, optab4;
11814 *multi_step_cvt = 0;
11815 if (loop_info)
11816 vect_loop = LOOP_VINFO_LOOP (loop_info);
11818 switch (code)
11820 case WIDEN_MULT_EXPR:
11821 /* The result of a vectorized widening operation usually requires
11822 two vectors (because the widened results do not fit into one vector).
11823 The generated vector results would normally be expected to be
11824 generated in the same order as in the original scalar computation,
11825 i.e. if 8 results are generated in each vector iteration, they are
11826 to be organized as follows:
11827 vect1: [res1,res2,res3,res4],
11828 vect2: [res5,res6,res7,res8].
11830 However, in the special case that the result of the widening
11831 operation is used in a reduction computation only, the order doesn't
11832 matter (because when vectorizing a reduction we change the order of
11833 the computation). Some targets can take advantage of this and
11834 generate more efficient code. For example, targets like Altivec,
11835 that support widen_mult using a sequence of {mult_even,mult_odd}
11836 generate the following vectors:
11837 vect1: [res1,res3,res5,res7],
11838 vect2: [res2,res4,res6,res8].
11840 When vectorizing outer-loops, we execute the inner-loop sequentially
11841 (each vectorized inner-loop iteration contributes to VF outer-loop
11842 iterations in parallel). We therefore don't allow to change the
11843 order of the computation in the inner-loop during outer-loop
11844 vectorization. */
11845 /* TODO: Another case in which order doesn't *really* matter is when we
11846 widen and then contract again, e.g. (short)((int)x * y >> 8).
11847 Normally, pack_trunc performs an even/odd permute, whereas the
11848 repack from an even/odd expansion would be an interleave, which
11849 would be significantly simpler for e.g. AVX2. */
11850 /* In any case, in order to avoid duplicating the code below, recurse
11851 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11852 are properly set up for the caller. If we fail, we'll continue with
11853 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11854 if (vect_loop
11855 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11856 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11857 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11858 stmt_info, vectype_out,
11859 vectype_in, code1, code2,
11860 multi_step_cvt, interm_types))
11862 /* Elements in a vector with vect_used_by_reduction property cannot
11863 be reordered if the use chain with this property does not have the
11864 same operation. One such an example is s += a * b, where elements
11865 in a and b cannot be reordered. Here we check if the vector defined
11866 by STMT is only directly used in the reduction statement. */
11867 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11868 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11869 if (use_stmt_info
11870 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11871 return true;
11873 c1 = VEC_WIDEN_MULT_LO_EXPR;
11874 c2 = VEC_WIDEN_MULT_HI_EXPR;
11875 break;
11877 case DOT_PROD_EXPR:
11878 c1 = DOT_PROD_EXPR;
11879 c2 = DOT_PROD_EXPR;
11880 break;
11882 case SAD_EXPR:
11883 c1 = SAD_EXPR;
11884 c2 = SAD_EXPR;
11885 break;
11887 case VEC_WIDEN_MULT_EVEN_EXPR:
11888 /* Support the recursion induced just above. */
11889 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11890 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11891 break;
11893 case WIDEN_LSHIFT_EXPR:
11894 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11895 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11896 break;
11898 CASE_CONVERT:
11899 c1 = VEC_UNPACK_LO_EXPR;
11900 c2 = VEC_UNPACK_HI_EXPR;
11901 break;
11903 case FLOAT_EXPR:
11904 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11905 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11906 break;
11908 case FIX_TRUNC_EXPR:
11909 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11910 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11911 break;
11913 default:
11914 gcc_unreachable ();
11917 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11918 std::swap (c1, c2);
11920 if (code == FIX_TRUNC_EXPR)
11922 /* The signedness is determined from output operand. */
11923 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11924 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11926 else if (CONVERT_EXPR_CODE_P (code)
11927 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11928 && VECTOR_BOOLEAN_TYPE_P (vectype)
11929 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11930 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11932 /* If the input and result modes are the same, a different optab
11933 is needed where we pass in the number of units in vectype. */
11934 optab1 = vec_unpacks_sbool_lo_optab;
11935 optab2 = vec_unpacks_sbool_hi_optab;
11937 else
11939 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11940 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11943 if (!optab1 || !optab2)
11944 return false;
11946 vec_mode = TYPE_MODE (vectype);
11947 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11948 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11949 return false;
11951 *code1 = c1;
11952 *code2 = c2;
11954 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11955 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11957 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11958 return true;
11959 /* For scalar masks we may have different boolean
11960 vector types having the same QImode. Thus we
11961 add additional check for elements number. */
11962 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11963 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11964 return true;
11967 /* Check if it's a multi-step conversion that can be done using intermediate
11968 types. */
11970 prev_type = vectype;
11971 prev_mode = vec_mode;
11973 if (!CONVERT_EXPR_CODE_P (code))
11974 return false;
11976 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11977 intermediate steps in promotion sequence. We try
11978 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11979 not. */
11980 interm_types->create (MAX_INTERM_CVT_STEPS);
11981 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11983 intermediate_mode = insn_data[icode1].operand[0].mode;
11984 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11985 intermediate_type
11986 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11987 else
11988 intermediate_type
11989 = lang_hooks.types.type_for_mode (intermediate_mode,
11990 TYPE_UNSIGNED (prev_type));
11992 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11993 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11994 && intermediate_mode == prev_mode
11995 && SCALAR_INT_MODE_P (prev_mode))
11997 /* If the input and result modes are the same, a different optab
11998 is needed where we pass in the number of units in vectype. */
11999 optab3 = vec_unpacks_sbool_lo_optab;
12000 optab4 = vec_unpacks_sbool_hi_optab;
12002 else
12004 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12005 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12008 if (!optab3 || !optab4
12009 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12010 || insn_data[icode1].operand[0].mode != intermediate_mode
12011 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12012 || insn_data[icode2].operand[0].mode != intermediate_mode
12013 || ((icode1 = optab_handler (optab3, intermediate_mode))
12014 == CODE_FOR_nothing)
12015 || ((icode2 = optab_handler (optab4, intermediate_mode))
12016 == CODE_FOR_nothing))
12017 break;
12019 interm_types->quick_push (intermediate_type);
12020 (*multi_step_cvt)++;
12022 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12023 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12025 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12026 return true;
12027 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12028 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12029 return true;
12032 prev_type = intermediate_type;
12033 prev_mode = intermediate_mode;
12036 interm_types->release ();
12037 return false;
12041 /* Function supportable_narrowing_operation
12043 Check whether an operation represented by the code CODE is a
12044 narrowing operation that is supported by the target platform in
12045 vector form (i.e., when operating on arguments of type VECTYPE_IN
12046 and producing a result of type VECTYPE_OUT).
12048 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12049 and FLOAT. This function checks if these operations are supported by
12050 the target platform directly via vector tree-codes.
12052 Output:
12053 - CODE1 is the code of a vector operation to be used when
12054 vectorizing the operation, if available.
12055 - MULTI_STEP_CVT determines the number of required intermediate steps in
12056 case of multi-step conversion (like int->short->char - in that case
12057 MULTI_STEP_CVT will be 1).
12058 - INTERM_TYPES contains the intermediate type required to perform the
12059 narrowing operation (short in the above example). */
12061 bool
12062 supportable_narrowing_operation (enum tree_code code,
12063 tree vectype_out, tree vectype_in,
12064 enum tree_code *code1, int *multi_step_cvt,
12065 vec<tree> *interm_types)
12067 machine_mode vec_mode;
12068 enum insn_code icode1;
12069 optab optab1, interm_optab;
12070 tree vectype = vectype_in;
12071 tree narrow_vectype = vectype_out;
12072 enum tree_code c1;
12073 tree intermediate_type, prev_type;
12074 machine_mode intermediate_mode, prev_mode;
12075 int i;
12076 bool uns;
12078 *multi_step_cvt = 0;
12079 switch (code)
12081 CASE_CONVERT:
12082 c1 = VEC_PACK_TRUNC_EXPR;
12083 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12084 && VECTOR_BOOLEAN_TYPE_P (vectype)
12085 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12086 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12087 optab1 = vec_pack_sbool_trunc_optab;
12088 else
12089 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12090 break;
12092 case FIX_TRUNC_EXPR:
12093 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12094 /* The signedness is determined from output operand. */
12095 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12096 break;
12098 case FLOAT_EXPR:
12099 c1 = VEC_PACK_FLOAT_EXPR;
12100 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12101 break;
12103 default:
12104 gcc_unreachable ();
12107 if (!optab1)
12108 return false;
12110 vec_mode = TYPE_MODE (vectype);
12111 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12112 return false;
12114 *code1 = c1;
12116 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12118 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12119 return true;
12120 /* For scalar masks we may have different boolean
12121 vector types having the same QImode. Thus we
12122 add additional check for elements number. */
12123 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12124 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12125 return true;
12128 if (code == FLOAT_EXPR)
12129 return false;
12131 /* Check if it's a multi-step conversion that can be done using intermediate
12132 types. */
12133 prev_mode = vec_mode;
12134 prev_type = vectype;
12135 if (code == FIX_TRUNC_EXPR)
12136 uns = TYPE_UNSIGNED (vectype_out);
12137 else
12138 uns = TYPE_UNSIGNED (vectype);
12140 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12141 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12142 costly than signed. */
12143 if (code == FIX_TRUNC_EXPR && uns)
12145 enum insn_code icode2;
12147 intermediate_type
12148 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12149 interm_optab
12150 = optab_for_tree_code (c1, intermediate_type, optab_default);
12151 if (interm_optab != unknown_optab
12152 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12153 && insn_data[icode1].operand[0].mode
12154 == insn_data[icode2].operand[0].mode)
12156 uns = false;
12157 optab1 = interm_optab;
12158 icode1 = icode2;
12162 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12163 intermediate steps in promotion sequence. We try
12164 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12165 interm_types->create (MAX_INTERM_CVT_STEPS);
12166 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12168 intermediate_mode = insn_data[icode1].operand[0].mode;
12169 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12170 intermediate_type
12171 = vect_double_mask_nunits (prev_type, intermediate_mode);
12172 else
12173 intermediate_type
12174 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12175 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12176 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12177 && intermediate_mode == prev_mode
12178 && SCALAR_INT_MODE_P (prev_mode))
12179 interm_optab = vec_pack_sbool_trunc_optab;
12180 else
12181 interm_optab
12182 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12183 optab_default);
12184 if (!interm_optab
12185 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12186 || insn_data[icode1].operand[0].mode != intermediate_mode
12187 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12188 == CODE_FOR_nothing))
12189 break;
12191 interm_types->quick_push (intermediate_type);
12192 (*multi_step_cvt)++;
12194 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12196 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12197 return true;
12198 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12199 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12200 return true;
12203 prev_mode = intermediate_mode;
12204 prev_type = intermediate_type;
12205 optab1 = interm_optab;
12208 interm_types->release ();
12209 return false;
12212 /* Generate and return a statement that sets vector mask MASK such that
12213 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12215 gcall *
12216 vect_gen_while (tree mask, tree start_index, tree end_index)
12218 tree cmp_type = TREE_TYPE (start_index);
12219 tree mask_type = TREE_TYPE (mask);
12220 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12221 cmp_type, mask_type,
12222 OPTIMIZE_FOR_SPEED));
12223 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12224 start_index, end_index,
12225 build_zero_cst (mask_type));
12226 gimple_call_set_lhs (call, mask);
12227 return call;
12230 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12231 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12233 tree
12234 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12235 tree end_index)
12237 tree tmp = make_ssa_name (mask_type);
12238 gcall *call = vect_gen_while (tmp, start_index, end_index);
12239 gimple_seq_add_stmt (seq, call);
12240 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12243 /* Try to compute the vector types required to vectorize STMT_INFO,
12244 returning true on success and false if vectorization isn't possible.
12245 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12246 take sure that the number of elements in the vectors is no bigger
12247 than GROUP_SIZE.
12249 On success:
12251 - Set *STMT_VECTYPE_OUT to:
12252 - NULL_TREE if the statement doesn't need to be vectorized;
12253 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12255 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12256 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12257 statement does not help to determine the overall number of units. */
12259 opt_result
12260 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12261 tree *stmt_vectype_out,
12262 tree *nunits_vectype_out,
12263 unsigned int group_size)
12265 gimple *stmt = stmt_info->stmt;
12267 /* For BB vectorization, we should always have a group size once we've
12268 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12269 are tentative requests during things like early data reference
12270 analysis and pattern recognition. */
12271 if (is_a <bb_vec_info> (vinfo))
12272 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12273 else
12274 group_size = 0;
12276 *stmt_vectype_out = NULL_TREE;
12277 *nunits_vectype_out = NULL_TREE;
12279 if (gimple_get_lhs (stmt) == NULL_TREE
12280 /* MASK_STORE has no lhs, but is ok. */
12281 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12283 if (is_a <gcall *> (stmt))
12285 /* Ignore calls with no lhs. These must be calls to
12286 #pragma omp simd functions, and what vectorization factor
12287 it really needs can't be determined until
12288 vectorizable_simd_clone_call. */
12289 if (dump_enabled_p ())
12290 dump_printf_loc (MSG_NOTE, vect_location,
12291 "defer to SIMD clone analysis.\n");
12292 return opt_result::success ();
12295 return opt_result::failure_at (stmt,
12296 "not vectorized: irregular stmt.%G", stmt);
12299 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12300 return opt_result::failure_at (stmt,
12301 "not vectorized: vector stmt in loop:%G",
12302 stmt);
12304 tree vectype;
12305 tree scalar_type = NULL_TREE;
12306 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12308 vectype = STMT_VINFO_VECTYPE (stmt_info);
12309 if (dump_enabled_p ())
12310 dump_printf_loc (MSG_NOTE, vect_location,
12311 "precomputed vectype: %T\n", vectype);
12313 else if (vect_use_mask_type_p (stmt_info))
12315 unsigned int precision = stmt_info->mask_precision;
12316 scalar_type = build_nonstandard_integer_type (precision, 1);
12317 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12318 if (!vectype)
12319 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12320 " data-type %T\n", scalar_type);
12321 if (dump_enabled_p ())
12322 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12324 else
12326 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12327 scalar_type = TREE_TYPE (DR_REF (dr));
12328 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12329 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12330 else
12331 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12333 if (dump_enabled_p ())
12335 if (group_size)
12336 dump_printf_loc (MSG_NOTE, vect_location,
12337 "get vectype for scalar type (group size %d):"
12338 " %T\n", group_size, scalar_type);
12339 else
12340 dump_printf_loc (MSG_NOTE, vect_location,
12341 "get vectype for scalar type: %T\n", scalar_type);
12343 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12344 if (!vectype)
12345 return opt_result::failure_at (stmt,
12346 "not vectorized:"
12347 " unsupported data-type %T\n",
12348 scalar_type);
12350 if (dump_enabled_p ())
12351 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12353 *stmt_vectype_out = vectype;
12355 /* Don't try to compute scalar types if the stmt produces a boolean
12356 vector; use the existing vector type instead. */
12357 tree nunits_vectype = vectype;
12358 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12360 /* The number of units is set according to the smallest scalar
12361 type (or the largest vector size, but we only support one
12362 vector size per vectorization). */
12363 HOST_WIDE_INT dummy;
12364 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12365 if (scalar_type != TREE_TYPE (vectype))
12367 if (dump_enabled_p ())
12368 dump_printf_loc (MSG_NOTE, vect_location,
12369 "get vectype for smallest scalar type: %T\n",
12370 scalar_type);
12371 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12372 group_size);
12373 if (!nunits_vectype)
12374 return opt_result::failure_at
12375 (stmt, "not vectorized: unsupported data-type %T\n",
12376 scalar_type);
12377 if (dump_enabled_p ())
12378 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12379 nunits_vectype);
12383 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12384 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12386 if (dump_enabled_p ())
12388 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12389 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12390 dump_printf (MSG_NOTE, "\n");
12393 *nunits_vectype_out = nunits_vectype;
12394 return opt_result::success ();