aix: Fix _STDC_FORMAT_MACROS in inttypes.h [PR97044]
[official-gcc.git] / gcc / tree-vect-stmts.c
blob191957c3543031ff587adccaf3bf19a641a815eb
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
781 if (fatal)
782 *fatal = false;
783 return res;
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
806 int inside_cost = 0, prologue_cost = 0;
808 gcc_assert (cost_vec != NULL);
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
841 static void
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 enum vect_def_type *dt,
844 unsigned int ncopies, int pwr,
845 stmt_vector_for_cost *cost_vec)
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
850 for (i = 0; i < pwr + 1; i++)
852 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 stmt_info, 0, vect_body);
854 ncopies *= 2;
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost, prologue_cost);
869 /* Returns true if the current function returns DECL. */
871 static bool
872 cfun_returns (tree decl)
874 edge_iterator ei;
875 edge e;
876 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
878 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879 if (!ret)
880 continue;
881 if (gimple_return_retval (ret) == decl)
882 return true;
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
885 though. */
886 gimple *def = ret;
889 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
891 while (gimple_clobber_p (def));
892 if (is_a <gassign *> (def)
893 && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 && gimple_assign_rhs1 (def) == decl)
895 return true;
897 return false;
900 /* Function vect_model_store_cost
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
905 static void
906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 vect_memory_access_type memory_access_type,
908 vec_load_store_type vls_type, slp_tree slp_node,
909 stmt_vector_for_cost *cost_vec)
911 unsigned int inside_cost = 0, prologue_cost = 0;
912 stmt_vec_info first_stmt_info = stmt_info;
913 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
915 /* ??? Somehow we need to fix this at the callers. */
916 if (slp_node)
917 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
919 if (vls_type == VLS_STORE_INVARIANT)
921 if (!slp_node)
922 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt_info == stmt_info);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = DR_GROUP_SIZE (first_stmt_info);
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963 inside_cost += record_stmt_cost (cost_vec,
964 ncopies * assumed_nunits,
965 scalar_store, stmt_info, 0, vect_body);
967 else
968 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
970 if (memory_access_type == VMAT_ELEMENTWISE
971 || memory_access_type == VMAT_STRIDED_SLP)
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975 inside_cost += record_stmt_cost (cost_vec,
976 ncopies * assumed_nunits,
977 vec_to_scalar, stmt_info, 0, vect_body);
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985 if (base
986 && (TREE_CODE (base) == RESULT_DECL
987 || (DECL_P (base) && cfun_returns (base)))
988 && !aggregate_value_p (base, cfun->decl))
990 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
992 if (REG_P (reg))
994 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
997 if (nregs > 1)
999 /* Spill. */
1000 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 vector_store,
1002 stmt_info, 0, vect_epilogue);
1003 /* Loads. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 scalar_load,
1006 stmt_info, 0, vect_epilogue);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1018 /* Calculate cost of DR's memory access. */
1019 void
1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 unsigned int *inside_cost,
1022 stmt_vector_for_cost *body_cost_vec)
1024 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1028 switch (alignment_support_scheme)
1030 case dr_aligned:
1032 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 vector_store, stmt_info, 0,
1034 vect_body);
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: aligned.\n");
1039 break;
1042 case dr_unaligned_supported:
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 unaligned_store, stmt_info,
1047 DR_MISALIGNMENT (dr_info),
1048 vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_store_cost: unaligned supported by "
1052 "hardware.\n");
1053 break;
1056 case dr_unaligned_unsupported:
1058 *inside_cost = VECT_MAX_COST;
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062 "vect_model_store_cost: unsupported access.\n");
1063 break;
1066 default:
1067 gcc_unreachable ();
1072 /* Function vect_model_load_cost
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1079 static void
1080 vect_model_load_cost (vec_info *vinfo,
1081 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 vect_memory_access_type memory_access_type,
1083 slp_tree slp_node,
1084 stmt_vector_for_cost *cost_vec)
1086 unsigned int inside_cost = 0, prologue_cost = 0;
1087 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1089 gcc_assert (cost_vec);
1091 /* ??? Somehow we need to fix this at the callers. */
1092 if (slp_node)
1093 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1095 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms;
1102 unsigned assumed_nunits
1103 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1104 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1105 vf, true, &n_perms);
1106 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1107 first_stmt_info, 0, vect_body);
1108 /* And adjust the number of loads performed. This handles
1109 redundancies as well as loads that are later dead. */
1110 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1111 bitmap_clear (perm);
1112 for (unsigned i = 0;
1113 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1114 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1115 ncopies = 0;
1116 bool load_seen = false;
1117 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1119 if (i % assumed_nunits == 0)
1121 if (load_seen)
1122 ncopies++;
1123 load_seen = false;
1125 if (bitmap_bit_p (perm, i))
1126 load_seen = true;
1128 if (load_seen)
1129 ncopies++;
1130 gcc_assert (ncopies
1131 <= (DR_GROUP_SIZE (first_stmt_info)
1132 - DR_GROUP_GAP (first_stmt_info)
1133 + assumed_nunits - 1) / assumed_nunits);
1136 /* Grouped loads read all elements in the group at once,
1137 so we want the DR for the first statement. */
1138 stmt_vec_info first_stmt_info = stmt_info;
1139 if (!slp_node && grouped_access_p)
1140 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1142 /* True if we should include any once-per-group costs as well as
1143 the cost of the statement itself. For SLP we only get called
1144 once per group anyhow. */
1145 bool first_stmt_p = (first_stmt_info == stmt_info);
1147 /* We assume that the cost of a single load-lanes instruction is
1148 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1149 access is instead being provided by a load-and-permute operation,
1150 include the cost of the permutes. */
1151 if (first_stmt_p
1152 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1154 /* Uses an even and odd extract operations or shuffle operations
1155 for each needed permute. */
1156 int group_size = DR_GROUP_SIZE (first_stmt_info);
1157 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1158 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1159 stmt_info, 0, vect_body);
1161 if (dump_enabled_p ())
1162 dump_printf_loc (MSG_NOTE, vect_location,
1163 "vect_model_load_cost: strided group_size = %d .\n",
1164 group_size);
1167 /* The loads themselves. */
1168 if (memory_access_type == VMAT_ELEMENTWISE
1169 || memory_access_type == VMAT_GATHER_SCATTER)
1171 /* N scalar loads plus gathering them into a vector. */
1172 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1173 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1174 inside_cost += record_stmt_cost (cost_vec,
1175 ncopies * assumed_nunits,
1176 scalar_load, stmt_info, 0, vect_body);
1178 else
1179 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1180 &inside_cost, &prologue_cost,
1181 cost_vec, cost_vec, true);
1182 if (memory_access_type == VMAT_ELEMENTWISE
1183 || memory_access_type == VMAT_STRIDED_SLP)
1184 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1185 stmt_info, 0, vect_body);
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: inside_cost = %d, "
1190 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1194 /* Calculate cost of DR's memory access. */
1195 void
1196 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1197 bool add_realign_cost, unsigned int *inside_cost,
1198 unsigned int *prologue_cost,
1199 stmt_vector_for_cost *prologue_cost_vec,
1200 stmt_vector_for_cost *body_cost_vec,
1201 bool record_prologue_costs)
1203 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1204 int alignment_support_scheme
1205 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1207 switch (alignment_support_scheme)
1209 case dr_aligned:
1211 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1212 stmt_info, 0, vect_body);
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_NOTE, vect_location,
1216 "vect_model_load_cost: aligned.\n");
1218 break;
1220 case dr_unaligned_supported:
1222 /* Here, we assign an additional cost for the unaligned load. */
1223 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1224 unaligned_load, stmt_info,
1225 DR_MISALIGNMENT (dr_info),
1226 vect_body);
1228 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_model_load_cost: unaligned supported by "
1231 "hardware.\n");
1233 break;
1235 case dr_explicit_realign:
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1238 vector_load, stmt_info, 0, vect_body);
1239 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1240 vec_perm, stmt_info, 0, vect_body);
1242 /* FIXME: If the misalignment remains fixed across the iterations of
1243 the containing loop, the following cost should be added to the
1244 prologue costs. */
1245 if (targetm.vectorize.builtin_mask_for_load)
1246 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1247 stmt_info, 0, vect_body);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: explicit realign\n");
1253 break;
1255 case dr_explicit_realign_optimized:
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_model_load_cost: unaligned software "
1260 "pipelined.\n");
1262 /* Unaligned software pipeline has a load of an address, an initial
1263 load, and possibly a mask operation to "prime" the loop. However,
1264 if this is an access in a group of loads, which provide grouped
1265 access, then the above cost should only be considered for one
1266 access in the group. Inside the loop, there is a load op
1267 and a realignment op. */
1269 if (add_realign_cost && record_prologue_costs)
1271 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1272 vector_stmt, stmt_info,
1273 0, vect_prologue);
1274 if (targetm.vectorize.builtin_mask_for_load)
1275 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1276 vector_stmt, stmt_info,
1277 0, vect_prologue);
1280 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1281 stmt_info, 0, vect_body);
1282 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1283 stmt_info, 0, vect_body);
1285 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE, vect_location,
1287 "vect_model_load_cost: explicit realign optimized"
1288 "\n");
1290 break;
1293 case dr_unaligned_unsupported:
1295 *inside_cost = VECT_MAX_COST;
1297 if (dump_enabled_p ())
1298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1299 "vect_model_load_cost: unsupported access.\n");
1300 break;
1303 default:
1304 gcc_unreachable ();
1308 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1309 the loop preheader for the vectorized stmt STMT_VINFO. */
1311 static void
1312 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1313 gimple_stmt_iterator *gsi)
1315 if (gsi)
1316 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1317 else
1318 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "created new init_stmt: %G", new_stmt);
1325 /* Function vect_init_vector.
1327 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1328 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1329 vector type a vector with all elements equal to VAL is created first.
1330 Place the initialization at GSI if it is not NULL. Otherwise, place the
1331 initialization at the loop preheader.
1332 Return the DEF of INIT_STMT.
1333 It will be used in the vectorization of STMT_INFO. */
1335 tree
1336 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1337 gimple_stmt_iterator *gsi)
1339 gimple *init_stmt;
1340 tree new_temp;
1342 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1343 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1345 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1346 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1348 /* Scalar boolean value should be transformed into
1349 all zeros or all ones value before building a vector. */
1350 if (VECTOR_BOOLEAN_TYPE_P (type))
1352 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1353 tree false_val = build_zero_cst (TREE_TYPE (type));
1355 if (CONSTANT_CLASS_P (val))
1356 val = integer_zerop (val) ? false_val : true_val;
1357 else
1359 new_temp = make_ssa_name (TREE_TYPE (type));
1360 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1361 val, true_val, false_val);
1362 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1363 val = new_temp;
1366 else
1368 gimple_seq stmts = NULL;
1369 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1370 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1371 TREE_TYPE (type), val);
1372 else
1373 /* ??? Condition vectorization expects us to do
1374 promotion of invariant/external defs. */
1375 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1376 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1377 !gsi_end_p (gsi2); )
1379 init_stmt = gsi_stmt (gsi2);
1380 gsi_remove (&gsi2, false);
1381 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1385 val = build_vector_from_val (type, val);
1388 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1389 init_stmt = gimple_build_assign (new_temp, val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 return new_temp;
1395 /* Function vect_get_vec_defs_for_operand.
1397 OP is an operand in STMT_VINFO. This function returns a vector of
1398 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1400 In the case that OP is an SSA_NAME which is defined in the loop, then
1401 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1403 In case OP is an invariant or constant, a new stmt that creates a vector def
1404 needs to be introduced. VECTYPE may be used to specify a required type for
1405 vector invariant. */
1407 void
1408 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1409 unsigned ncopies,
1410 tree op, vec<tree> *vec_oprnds, tree vectype)
1412 gimple *def_stmt;
1413 enum vect_def_type dt;
1414 bool is_simple_use;
1415 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1417 if (dump_enabled_p ())
1418 dump_printf_loc (MSG_NOTE, vect_location,
1419 "vect_get_vec_defs_for_operand: %T\n", op);
1421 stmt_vec_info def_stmt_info;
1422 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1423 &def_stmt_info, &def_stmt);
1424 gcc_assert (is_simple_use);
1425 if (def_stmt && dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1428 vec_oprnds->create (ncopies);
1429 if (dt == vect_constant_def || dt == vect_external_def)
1431 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1432 tree vector_type;
1434 if (vectype)
1435 vector_type = vectype;
1436 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1437 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1438 vector_type = truth_type_for (stmt_vectype);
1439 else
1440 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1442 gcc_assert (vector_type);
1443 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1444 while (ncopies--)
1445 vec_oprnds->quick_push (vop);
1447 else
1449 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1450 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1451 for (unsigned i = 0; i < ncopies; ++i)
1452 vec_oprnds->quick_push (gimple_get_lhs
1453 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1458 /* Get vectorized definitions for OP0 and OP1. */
1460 void
1461 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1462 unsigned ncopies,
1463 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1464 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1465 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1466 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1468 if (slp_node)
1470 if (op0)
1471 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1472 if (op1)
1473 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1474 if (op2)
1475 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1476 if (op3)
1477 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1479 else
1481 if (op0)
1482 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1483 op0, vec_oprnds0, vectype0);
1484 if (op1)
1485 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1486 op1, vec_oprnds1, vectype1);
1487 if (op2)
1488 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1489 op2, vec_oprnds2, vectype2);
1490 if (op3)
1491 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1492 op3, vec_oprnds3, vectype3);
1496 void
1497 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1498 unsigned ncopies,
1499 tree op0, vec<tree> *vec_oprnds0,
1500 tree op1, vec<tree> *vec_oprnds1,
1501 tree op2, vec<tree> *vec_oprnds2,
1502 tree op3, vec<tree> *vec_oprnds3)
1504 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1505 op0, vec_oprnds0, NULL_TREE,
1506 op1, vec_oprnds1, NULL_TREE,
1507 op2, vec_oprnds2, NULL_TREE,
1508 op3, vec_oprnds3, NULL_TREE);
1511 /* Helper function called by vect_finish_replace_stmt and
1512 vect_finish_stmt_generation. Set the location of the new
1513 statement and create and return a stmt_vec_info for it. */
1515 static void
1516 vect_finish_stmt_generation_1 (vec_info *,
1517 stmt_vec_info stmt_info, gimple *vec_stmt)
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1522 if (stmt_info)
1524 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1526 /* While EH edges will generally prevent vectorization, stmt might
1527 e.g. be in a must-not-throw region. Ensure newly created stmts
1528 that could throw are part of the same region. */
1529 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1530 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1531 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1533 else
1534 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1537 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1538 which sets the same scalar result as STMT_INFO did. Create and return a
1539 stmt_vec_info for VEC_STMT. */
1541 void
1542 vect_finish_replace_stmt (vec_info *vinfo,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1545 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1546 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1548 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1549 gsi_replace (&gsi, vec_stmt, true);
1551 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1554 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1555 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1557 void
1558 vect_finish_stmt_generation (vec_info *vinfo,
1559 stmt_vec_info stmt_info, gimple *vec_stmt,
1560 gimple_stmt_iterator *gsi)
1562 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1564 if (!gsi_end_p (*gsi)
1565 && gimple_has_mem_ops (vec_stmt))
1567 gimple *at_stmt = gsi_stmt (*gsi);
1568 tree vuse = gimple_vuse (at_stmt);
1569 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1571 tree vdef = gimple_vdef (at_stmt);
1572 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1573 gimple_set_modified (vec_stmt, true);
1574 /* If we have an SSA vuse and insert a store, update virtual
1575 SSA form to avoid triggering the renamer. Do so only
1576 if we can easily see all uses - which is what almost always
1577 happens with the way vectorized stmts are inserted. */
1578 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1579 && ((is_gimple_assign (vec_stmt)
1580 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1581 || (is_gimple_call (vec_stmt)
1582 && !(gimple_call_flags (vec_stmt)
1583 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1585 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1586 gimple_set_vdef (vec_stmt, new_vdef);
1587 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1591 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1592 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1595 /* We want to vectorize a call to combined function CFN with function
1596 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1597 as the types of all inputs. Check whether this is possible using
1598 an internal function, returning its code if so or IFN_LAST if not. */
1600 static internal_fn
1601 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1602 tree vectype_out, tree vectype_in)
1604 internal_fn ifn;
1605 if (internal_fn_p (cfn))
1606 ifn = as_internal_fn (cfn);
1607 else
1608 ifn = associated_internal_fn (fndecl);
1609 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1611 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1612 if (info.vectorizable)
1614 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1615 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1616 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1617 OPTIMIZE_FOR_SPEED))
1618 return ifn;
1621 return IFN_LAST;
1625 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1626 gimple_stmt_iterator *);
1628 /* Check whether a load or store statement in the loop described by
1629 LOOP_VINFO is possible in a loop using partial vectors. This is
1630 testing whether the vectorizer pass has the appropriate support,
1631 as well as whether the target does.
1633 VLS_TYPE says whether the statement is a load or store and VECTYPE
1634 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1635 says how the load or store is going to be implemented and GROUP_SIZE
1636 is the number of load or store statements in the containing group.
1637 If the access is a gather load or scatter store, GS_INFO describes
1638 its arguments. If the load or store is conditional, SCALAR_MASK is the
1639 condition under which it occurs.
1641 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1642 vectors is not supported, otherwise record the required rgroup control
1643 types. */
1645 static void
1646 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1647 vec_load_store_type vls_type,
1648 int group_size,
1649 vect_memory_access_type
1650 memory_access_type,
1651 gather_scatter_info *gs_info,
1652 tree scalar_mask)
1654 /* Invariant loads need no special support. */
1655 if (memory_access_type == VMAT_INVARIANT)
1656 return;
1658 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1659 machine_mode vecmode = TYPE_MODE (vectype);
1660 bool is_load = (vls_type == VLS_LOAD);
1661 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1663 if (is_load
1664 ? !vect_load_lanes_supported (vectype, group_size, true)
1665 : !vect_store_lanes_supported (vectype, group_size, true))
1667 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1669 "can't operate on partial vectors because"
1670 " the target doesn't have an appropriate"
1671 " load/store-lanes instruction.\n");
1672 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1673 return;
1675 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1676 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1677 return;
1680 if (memory_access_type == VMAT_GATHER_SCATTER)
1682 internal_fn ifn = (is_load
1683 ? IFN_MASK_GATHER_LOAD
1684 : IFN_MASK_SCATTER_STORE);
1685 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1686 gs_info->memory_type,
1687 gs_info->offset_vectype,
1688 gs_info->scale))
1690 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692 "can't operate on partial vectors because"
1693 " the target doesn't have an appropriate"
1694 " gather load or scatter store instruction.\n");
1695 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1696 return;
1698 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1699 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1700 return;
1703 if (memory_access_type != VMAT_CONTIGUOUS
1704 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1706 /* Element X of the data must come from iteration i * VF + X of the
1707 scalar loop. We need more work to support other mappings. */
1708 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1710 "can't operate on partial vectors because an"
1711 " access isn't contiguous.\n");
1712 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1713 return;
1716 if (!VECTOR_MODE_P (vecmode))
1718 if (dump_enabled_p ())
1719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1720 "can't operate on partial vectors when emulating"
1721 " vector operations.\n");
1722 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1723 return;
1726 /* We might load more scalars than we need for permuting SLP loads.
1727 We checked in get_group_load_store_type that the extra elements
1728 don't leak into a new vector. */
1729 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1731 unsigned int nvectors;
1732 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1733 return nvectors;
1734 gcc_unreachable ();
1737 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1738 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1739 machine_mode mask_mode;
1740 bool using_partial_vectors_p = false;
1741 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1742 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1744 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1745 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1746 using_partial_vectors_p = true;
1749 machine_mode vmode;
1750 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1752 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1753 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1754 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1755 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1756 using_partial_vectors_p = true;
1759 if (!using_partial_vectors_p)
1761 if (dump_enabled_p ())
1762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1763 "can't operate on partial vectors because the"
1764 " target doesn't have the appropriate partial"
1765 " vectorization load or store.\n");
1766 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1770 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1771 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1772 that needs to be applied to all loads and stores in a vectorized loop.
1773 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1775 MASK_TYPE is the type of both masks. If new statements are needed,
1776 insert them before GSI. */
1778 static tree
1779 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1780 gimple_stmt_iterator *gsi)
1782 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1783 if (!loop_mask)
1784 return vec_mask;
1786 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1787 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1788 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1789 vec_mask, loop_mask);
1790 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1791 return and_res;
1794 /* Determine whether we can use a gather load or scatter store to vectorize
1795 strided load or store STMT_INFO by truncating the current offset to a
1796 smaller width. We need to be able to construct an offset vector:
1798 { 0, X, X*2, X*3, ... }
1800 without loss of precision, where X is STMT_INFO's DR_STEP.
1802 Return true if this is possible, describing the gather load or scatter
1803 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1805 static bool
1806 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1807 loop_vec_info loop_vinfo, bool masked_p,
1808 gather_scatter_info *gs_info)
1810 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1811 data_reference *dr = dr_info->dr;
1812 tree step = DR_STEP (dr);
1813 if (TREE_CODE (step) != INTEGER_CST)
1815 /* ??? Perhaps we could use range information here? */
1816 if (dump_enabled_p ())
1817 dump_printf_loc (MSG_NOTE, vect_location,
1818 "cannot truncate variable step.\n");
1819 return false;
1822 /* Get the number of bits in an element. */
1823 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1824 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1825 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1827 /* Set COUNT to the upper limit on the number of elements - 1.
1828 Start with the maximum vectorization factor. */
1829 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1831 /* Try lowering COUNT to the number of scalar latch iterations. */
1832 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1833 widest_int max_iters;
1834 if (max_loop_iterations (loop, &max_iters)
1835 && max_iters < count)
1836 count = max_iters.to_shwi ();
1838 /* Try scales of 1 and the element size. */
1839 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1840 wi::overflow_type overflow = wi::OVF_NONE;
1841 for (int i = 0; i < 2; ++i)
1843 int scale = scales[i];
1844 widest_int factor;
1845 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1846 continue;
1848 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1849 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1850 if (overflow)
1851 continue;
1852 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1853 unsigned int min_offset_bits = wi::min_precision (range, sign);
1855 /* Find the narrowest viable offset type. */
1856 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1857 tree offset_type = build_nonstandard_integer_type (offset_bits,
1858 sign == UNSIGNED);
1860 /* See whether the target supports the operation with an offset
1861 no narrower than OFFSET_TYPE. */
1862 tree memory_type = TREE_TYPE (DR_REF (dr));
1863 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1864 vectype, memory_type, offset_type, scale,
1865 &gs_info->ifn, &gs_info->offset_vectype))
1866 continue;
1868 gs_info->decl = NULL_TREE;
1869 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1870 but we don't need to store that here. */
1871 gs_info->base = NULL_TREE;
1872 gs_info->element_type = TREE_TYPE (vectype);
1873 gs_info->offset = fold_convert (offset_type, step);
1874 gs_info->offset_dt = vect_constant_def;
1875 gs_info->scale = scale;
1876 gs_info->memory_type = memory_type;
1877 return true;
1880 if (overflow && dump_enabled_p ())
1881 dump_printf_loc (MSG_NOTE, vect_location,
1882 "truncating gather/scatter offset to %d bits"
1883 " might change its value.\n", element_bits);
1885 return false;
1888 /* Return true if we can use gather/scatter internal functions to
1889 vectorize STMT_INFO, which is a grouped or strided load or store.
1890 MASKED_P is true if load or store is conditional. When returning
1891 true, fill in GS_INFO with the information required to perform the
1892 operation. */
1894 static bool
1895 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1896 loop_vec_info loop_vinfo, bool masked_p,
1897 gather_scatter_info *gs_info)
1899 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1900 || gs_info->decl)
1901 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1902 masked_p, gs_info);
1904 tree old_offset_type = TREE_TYPE (gs_info->offset);
1905 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1907 gcc_assert (TYPE_PRECISION (new_offset_type)
1908 >= TYPE_PRECISION (old_offset_type));
1909 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1911 if (dump_enabled_p ())
1912 dump_printf_loc (MSG_NOTE, vect_location,
1913 "using gather/scatter for strided/grouped access,"
1914 " scale = %d\n", gs_info->scale);
1916 return true;
1919 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1920 elements with a known constant step. Return -1 if that step
1921 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1923 static int
1924 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1926 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1927 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1928 size_zero_node);
1931 /* If the target supports a permute mask that reverses the elements in
1932 a vector of type VECTYPE, return that mask, otherwise return null. */
1934 static tree
1935 perm_mask_for_reverse (tree vectype)
1937 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1939 /* The encoding has a single stepped pattern. */
1940 vec_perm_builder sel (nunits, 1, 3);
1941 for (int i = 0; i < 3; ++i)
1942 sel.quick_push (nunits - 1 - i);
1944 vec_perm_indices indices (sel, 1, nunits);
1945 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1946 return NULL_TREE;
1947 return vect_gen_perm_mask_checked (vectype, indices);
1950 /* A subroutine of get_load_store_type, with a subset of the same
1951 arguments. Handle the case where STMT_INFO is a load or store that
1952 accesses consecutive elements with a negative step. */
1954 static vect_memory_access_type
1955 get_negative_load_store_type (vec_info *vinfo,
1956 stmt_vec_info stmt_info, tree vectype,
1957 vec_load_store_type vls_type,
1958 unsigned int ncopies)
1960 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1961 dr_alignment_support alignment_support_scheme;
1963 if (ncopies > 1)
1965 if (dump_enabled_p ())
1966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1967 "multiple types with negative step.\n");
1968 return VMAT_ELEMENTWISE;
1971 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1972 dr_info, false);
1973 if (alignment_support_scheme != dr_aligned
1974 && alignment_support_scheme != dr_unaligned_supported)
1976 if (dump_enabled_p ())
1977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1978 "negative step but alignment required.\n");
1979 return VMAT_ELEMENTWISE;
1982 if (vls_type == VLS_STORE_INVARIANT)
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_NOTE, vect_location,
1986 "negative step with invariant source;"
1987 " no permute needed.\n");
1988 return VMAT_CONTIGUOUS_DOWN;
1991 if (!perm_mask_for_reverse (vectype))
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 "negative step and reversing not supported.\n");
1996 return VMAT_ELEMENTWISE;
1999 return VMAT_CONTIGUOUS_REVERSE;
2002 /* STMT_INFO is either a masked or unconditional store. Return the value
2003 being stored. */
2005 tree
2006 vect_get_store_rhs (stmt_vec_info stmt_info)
2008 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2010 gcc_assert (gimple_assign_single_p (assign));
2011 return gimple_assign_rhs1 (assign);
2013 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2015 internal_fn ifn = gimple_call_internal_fn (call);
2016 int index = internal_fn_stored_value_index (ifn);
2017 gcc_assert (index >= 0);
2018 return gimple_call_arg (call, index);
2020 gcc_unreachable ();
2023 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2025 This function returns a vector type which can be composed with NETLS pieces,
2026 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2027 same vector size as the return vector. It checks target whether supports
2028 pieces-size vector mode for construction firstly, if target fails to, check
2029 pieces-size scalar mode for construction further. It returns NULL_TREE if
2030 fails to find the available composition.
2032 For example, for (vtype=V16QI, nelts=4), we can probably get:
2033 - V16QI with PTYPE V4QI.
2034 - V4SI with PTYPE SI.
2035 - NULL_TREE. */
2037 static tree
2038 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2040 gcc_assert (VECTOR_TYPE_P (vtype));
2041 gcc_assert (known_gt (nelts, 0U));
2043 machine_mode vmode = TYPE_MODE (vtype);
2044 if (!VECTOR_MODE_P (vmode))
2045 return NULL_TREE;
2047 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2048 unsigned int pbsize;
2049 if (constant_multiple_p (vbsize, nelts, &pbsize))
2051 /* First check if vec_init optab supports construction from
2052 vector pieces directly. */
2053 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2054 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2055 machine_mode rmode;
2056 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2057 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2058 != CODE_FOR_nothing))
2060 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2061 return vtype;
2064 /* Otherwise check if exists an integer type of the same piece size and
2065 if vec_init optab supports construction from it directly. */
2066 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2067 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2068 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2069 != CODE_FOR_nothing))
2071 *ptype = build_nonstandard_integer_type (pbsize, 1);
2072 return build_vector_type (*ptype, nelts);
2076 return NULL_TREE;
2079 /* A subroutine of get_load_store_type, with a subset of the same
2080 arguments. Handle the case where STMT_INFO is part of a grouped load
2081 or store.
2083 For stores, the statements in the group are all consecutive
2084 and there is no gap at the end. For loads, the statements in the
2085 group might not be consecutive; there can be gaps between statements
2086 as well as at the end. */
2088 static bool
2089 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2090 tree vectype, slp_tree slp_node,
2091 bool masked_p, vec_load_store_type vls_type,
2092 vect_memory_access_type *memory_access_type,
2093 dr_alignment_support *alignment_support_scheme,
2094 gather_scatter_info *gs_info)
2096 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2097 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2098 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2099 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2100 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2101 bool single_element_p = (stmt_info == first_stmt_info
2102 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2103 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2104 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2106 /* True if the vectorized statements would access beyond the last
2107 statement in the group. */
2108 bool overrun_p = false;
2110 /* True if we can cope with such overrun by peeling for gaps, so that
2111 there is at least one final scalar iteration after the vector loop. */
2112 bool can_overrun_p = (!masked_p
2113 && vls_type == VLS_LOAD
2114 && loop_vinfo
2115 && !loop->inner);
2117 /* There can only be a gap at the end of the group if the stride is
2118 known at compile time. */
2119 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2121 /* Stores can't yet have gaps. */
2122 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2124 if (slp_node)
2126 /* For SLP vectorization we directly vectorize a subchain
2127 without permutation. */
2128 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2129 first_dr_info
2130 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2131 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2133 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2134 separated by the stride, until we have a complete vector.
2135 Fall back to scalar accesses if that isn't possible. */
2136 if (multiple_p (nunits, group_size))
2137 *memory_access_type = VMAT_STRIDED_SLP;
2138 else
2139 *memory_access_type = VMAT_ELEMENTWISE;
2141 else
2143 overrun_p = loop_vinfo && gap != 0;
2144 if (overrun_p && vls_type != VLS_LOAD)
2146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2147 "Grouped store with gaps requires"
2148 " non-consecutive accesses\n");
2149 return false;
2151 /* An overrun is fine if the trailing elements are smaller
2152 than the alignment boundary B. Every vector access will
2153 be a multiple of B and so we are guaranteed to access a
2154 non-gap element in the same B-sized block. */
2155 if (overrun_p
2156 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2157 / vect_get_scalar_dr_size (first_dr_info)))
2158 overrun_p = false;
2160 /* If the gap splits the vector in half and the target
2161 can do half-vector operations avoid the epilogue peeling
2162 by simply loading half of the vector only. Usually
2163 the construction with an upper zero half will be elided. */
2164 dr_alignment_support alignment_support_scheme;
2165 tree half_vtype;
2166 if (overrun_p
2167 && !masked_p
2168 && (((alignment_support_scheme
2169 = vect_supportable_dr_alignment (vinfo,
2170 first_dr_info, false)))
2171 == dr_aligned
2172 || alignment_support_scheme == dr_unaligned_supported)
2173 && known_eq (nunits, (group_size - gap) * 2)
2174 && known_eq (nunits, group_size)
2175 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2176 != NULL_TREE))
2177 overrun_p = false;
2179 if (overrun_p && !can_overrun_p)
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "Peeling for outer loop is not supported\n");
2184 return false;
2186 int cmp = compare_step_with_zero (vinfo, stmt_info);
2187 if (cmp < 0)
2189 if (single_element_p)
2190 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2191 only correct for single element "interleaving" SLP. */
2192 *memory_access_type = get_negative_load_store_type
2193 (vinfo, stmt_info, vectype, vls_type, 1);
2194 else
2196 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2197 separated by the stride, until we have a complete vector.
2198 Fall back to scalar accesses if that isn't possible. */
2199 if (multiple_p (nunits, group_size))
2200 *memory_access_type = VMAT_STRIDED_SLP;
2201 else
2202 *memory_access_type = VMAT_ELEMENTWISE;
2205 else
2207 gcc_assert (!loop_vinfo || cmp > 0);
2208 *memory_access_type = VMAT_CONTIGUOUS;
2212 else
2214 /* We can always handle this case using elementwise accesses,
2215 but see if something more efficient is available. */
2216 *memory_access_type = VMAT_ELEMENTWISE;
2218 /* If there is a gap at the end of the group then these optimizations
2219 would access excess elements in the last iteration. */
2220 bool would_overrun_p = (gap != 0);
2221 /* An overrun is fine if the trailing elements are smaller than the
2222 alignment boundary B. Every vector access will be a multiple of B
2223 and so we are guaranteed to access a non-gap element in the
2224 same B-sized block. */
2225 if (would_overrun_p
2226 && !masked_p
2227 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2228 / vect_get_scalar_dr_size (first_dr_info)))
2229 would_overrun_p = false;
2231 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2232 && (can_overrun_p || !would_overrun_p)
2233 && compare_step_with_zero (vinfo, stmt_info) > 0)
2235 /* First cope with the degenerate case of a single-element
2236 vector. */
2237 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2238 *memory_access_type = VMAT_CONTIGUOUS;
2240 /* Otherwise try using LOAD/STORE_LANES. */
2241 if (*memory_access_type == VMAT_ELEMENTWISE
2242 && (vls_type == VLS_LOAD
2243 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2244 : vect_store_lanes_supported (vectype, group_size,
2245 masked_p)))
2247 *memory_access_type = VMAT_LOAD_STORE_LANES;
2248 overrun_p = would_overrun_p;
2251 /* If that fails, try using permuting loads. */
2252 if (*memory_access_type == VMAT_ELEMENTWISE
2253 && (vls_type == VLS_LOAD
2254 ? vect_grouped_load_supported (vectype, single_element_p,
2255 group_size)
2256 : vect_grouped_store_supported (vectype, group_size)))
2258 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2259 overrun_p = would_overrun_p;
2263 /* As a last resort, trying using a gather load or scatter store.
2265 ??? Although the code can handle all group sizes correctly,
2266 it probably isn't a win to use separate strided accesses based
2267 on nearby locations. Or, even if it's a win over scalar code,
2268 it might not be a win over vectorizing at a lower VF, if that
2269 allows us to use contiguous accesses. */
2270 if (*memory_access_type == VMAT_ELEMENTWISE
2271 && single_element_p
2272 && loop_vinfo
2273 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2274 masked_p, gs_info))
2275 *memory_access_type = VMAT_GATHER_SCATTER;
2278 if (*memory_access_type == VMAT_GATHER_SCATTER
2279 || *memory_access_type == VMAT_ELEMENTWISE)
2280 *alignment_support_scheme = dr_unaligned_supported;
2281 else
2282 *alignment_support_scheme
2283 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2285 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2287 /* STMT is the leader of the group. Check the operands of all the
2288 stmts of the group. */
2289 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2290 while (next_stmt_info)
2292 tree op = vect_get_store_rhs (next_stmt_info);
2293 enum vect_def_type dt;
2294 if (!vect_is_simple_use (op, vinfo, &dt))
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "use not simple.\n");
2299 return false;
2301 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2305 if (overrun_p)
2307 gcc_assert (can_overrun_p);
2308 if (dump_enabled_p ())
2309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2310 "Data access with gaps requires scalar "
2311 "epilogue loop\n");
2312 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2315 return true;
2318 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2319 if there is a memory access type that the vectorized form can use,
2320 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2321 or scatters, fill in GS_INFO accordingly. In addition
2322 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2323 the target does not support the alignment scheme.
2325 SLP says whether we're performing SLP rather than loop vectorization.
2326 MASKED_P is true if the statement is conditional on a vectorized mask.
2327 VECTYPE is the vector type that the vectorized statements will use.
2328 NCOPIES is the number of vector statements that will be needed. */
2330 static bool
2331 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2332 tree vectype, slp_tree slp_node,
2333 bool masked_p, vec_load_store_type vls_type,
2334 unsigned int ncopies,
2335 vect_memory_access_type *memory_access_type,
2336 dr_alignment_support *alignment_support_scheme,
2337 gather_scatter_info *gs_info)
2339 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2340 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2341 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2343 *memory_access_type = VMAT_GATHER_SCATTER;
2344 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2345 gcc_unreachable ();
2346 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2347 &gs_info->offset_dt,
2348 &gs_info->offset_vectype))
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2352 "%s index use not simple.\n",
2353 vls_type == VLS_LOAD ? "gather" : "scatter");
2354 return false;
2356 /* Gather-scatter accesses perform only component accesses, alignment
2357 is irrelevant for them. */
2358 *alignment_support_scheme = dr_unaligned_supported;
2360 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2362 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2363 masked_p,
2364 vls_type, memory_access_type,
2365 alignment_support_scheme, gs_info))
2366 return false;
2368 else if (STMT_VINFO_STRIDED_P (stmt_info))
2370 gcc_assert (!slp_node);
2371 if (loop_vinfo
2372 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2373 masked_p, gs_info))
2374 *memory_access_type = VMAT_GATHER_SCATTER;
2375 else
2376 *memory_access_type = VMAT_ELEMENTWISE;
2377 /* Alignment is irrelevant here. */
2378 *alignment_support_scheme = dr_unaligned_supported;
2380 else
2382 int cmp = compare_step_with_zero (vinfo, stmt_info);
2383 if (cmp < 0)
2384 *memory_access_type = get_negative_load_store_type
2385 (vinfo, stmt_info, vectype, vls_type, ncopies);
2386 else if (cmp == 0)
2388 gcc_assert (vls_type == VLS_LOAD);
2389 *memory_access_type = VMAT_INVARIANT;
2391 else
2392 *memory_access_type = VMAT_CONTIGUOUS;
2393 *alignment_support_scheme
2394 = vect_supportable_dr_alignment (vinfo,
2395 STMT_VINFO_DR_INFO (stmt_info), false);
2398 if ((*memory_access_type == VMAT_ELEMENTWISE
2399 || *memory_access_type == VMAT_STRIDED_SLP)
2400 && !nunits.is_constant ())
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2404 "Not using elementwise accesses due to variable "
2405 "vectorization factor.\n");
2406 return false;
2409 if (*alignment_support_scheme == dr_unaligned_unsupported)
2411 if (dump_enabled_p ())
2412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2413 "unsupported unaligned access\n");
2414 return false;
2417 /* FIXME: At the moment the cost model seems to underestimate the
2418 cost of using elementwise accesses. This check preserves the
2419 traditional behavior until that can be fixed. */
2420 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2421 if (!first_stmt_info)
2422 first_stmt_info = stmt_info;
2423 if (*memory_access_type == VMAT_ELEMENTWISE
2424 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2425 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2426 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2427 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2429 if (dump_enabled_p ())
2430 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2431 "not falling back to elementwise accesses\n");
2432 return false;
2434 return true;
2437 /* Return true if boolean argument MASK is suitable for vectorizing
2438 conditional operation STMT_INFO. When returning true, store the type
2439 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2440 in *MASK_VECTYPE_OUT. */
2442 static bool
2443 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2444 vect_def_type *mask_dt_out,
2445 tree *mask_vectype_out)
2447 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2449 if (dump_enabled_p ())
2450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2451 "mask argument is not a boolean.\n");
2452 return false;
2455 if (TREE_CODE (mask) != SSA_NAME)
2457 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459 "mask argument is not an SSA name.\n");
2460 return false;
2463 enum vect_def_type mask_dt;
2464 tree mask_vectype;
2465 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2467 if (dump_enabled_p ())
2468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2469 "mask use not simple.\n");
2470 return false;
2473 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2474 if (!mask_vectype)
2475 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2477 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2479 if (dump_enabled_p ())
2480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2481 "could not find an appropriate vector mask type.\n");
2482 return false;
2485 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2486 TYPE_VECTOR_SUBPARTS (vectype)))
2488 if (dump_enabled_p ())
2489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2490 "vector mask type %T"
2491 " does not match vector data type %T.\n",
2492 mask_vectype, vectype);
2494 return false;
2497 *mask_dt_out = mask_dt;
2498 *mask_vectype_out = mask_vectype;
2499 return true;
2502 /* Return true if stored value RHS is suitable for vectorizing store
2503 statement STMT_INFO. When returning true, store the type of the
2504 definition in *RHS_DT_OUT, the type of the vectorized store value in
2505 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2507 static bool
2508 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2509 slp_tree slp_node, tree rhs,
2510 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2511 vec_load_store_type *vls_type_out)
2513 /* In the case this is a store from a constant make sure
2514 native_encode_expr can handle it. */
2515 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "cannot encode constant as a byte sequence.\n");
2520 return false;
2523 enum vect_def_type rhs_dt;
2524 tree rhs_vectype;
2525 slp_tree slp_op;
2526 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2527 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2529 if (dump_enabled_p ())
2530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2531 "use not simple.\n");
2532 return false;
2535 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2536 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2538 if (dump_enabled_p ())
2539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2540 "incompatible vector types.\n");
2541 return false;
2544 *rhs_dt_out = rhs_dt;
2545 *rhs_vectype_out = rhs_vectype;
2546 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2547 *vls_type_out = VLS_STORE_INVARIANT;
2548 else
2549 *vls_type_out = VLS_STORE;
2550 return true;
2553 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2554 Note that we support masks with floating-point type, in which case the
2555 floats are interpreted as a bitmask. */
2557 static tree
2558 vect_build_all_ones_mask (vec_info *vinfo,
2559 stmt_vec_info stmt_info, tree masktype)
2561 if (TREE_CODE (masktype) == INTEGER_TYPE)
2562 return build_int_cst (masktype, -1);
2563 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2565 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2566 mask = build_vector_from_val (masktype, mask);
2567 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2569 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2571 REAL_VALUE_TYPE r;
2572 long tmp[6];
2573 for (int j = 0; j < 6; ++j)
2574 tmp[j] = -1;
2575 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2576 tree mask = build_real (TREE_TYPE (masktype), r);
2577 mask = build_vector_from_val (masktype, mask);
2578 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2580 gcc_unreachable ();
2583 /* Build an all-zero merge value of type VECTYPE while vectorizing
2584 STMT_INFO as a gather load. */
2586 static tree
2587 vect_build_zero_merge_argument (vec_info *vinfo,
2588 stmt_vec_info stmt_info, tree vectype)
2590 tree merge;
2591 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2592 merge = build_int_cst (TREE_TYPE (vectype), 0);
2593 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2595 REAL_VALUE_TYPE r;
2596 long tmp[6];
2597 for (int j = 0; j < 6; ++j)
2598 tmp[j] = 0;
2599 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2600 merge = build_real (TREE_TYPE (vectype), r);
2602 else
2603 gcc_unreachable ();
2604 merge = build_vector_from_val (vectype, merge);
2605 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2608 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2609 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2610 the gather load operation. If the load is conditional, MASK is the
2611 unvectorized condition and MASK_DT is its definition type, otherwise
2612 MASK is null. */
2614 static void
2615 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2616 gimple_stmt_iterator *gsi,
2617 gimple **vec_stmt,
2618 gather_scatter_info *gs_info,
2619 tree mask)
2621 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2622 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2623 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2624 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2625 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2626 edge pe = loop_preheader_edge (loop);
2627 enum { NARROW, NONE, WIDEN } modifier;
2628 poly_uint64 gather_off_nunits
2629 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2631 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2632 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2633 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2634 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2635 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2636 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2637 tree scaletype = TREE_VALUE (arglist);
2638 tree real_masktype = masktype;
2639 gcc_checking_assert (types_compatible_p (srctype, rettype)
2640 && (!mask
2641 || TREE_CODE (masktype) == INTEGER_TYPE
2642 || types_compatible_p (srctype, masktype)));
2643 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2644 masktype = truth_type_for (srctype);
2646 tree mask_halftype = masktype;
2647 tree perm_mask = NULL_TREE;
2648 tree mask_perm_mask = NULL_TREE;
2649 if (known_eq (nunits, gather_off_nunits))
2650 modifier = NONE;
2651 else if (known_eq (nunits * 2, gather_off_nunits))
2653 modifier = WIDEN;
2655 /* Currently widening gathers and scatters are only supported for
2656 fixed-length vectors. */
2657 int count = gather_off_nunits.to_constant ();
2658 vec_perm_builder sel (count, count, 1);
2659 for (int i = 0; i < count; ++i)
2660 sel.quick_push (i | (count / 2));
2662 vec_perm_indices indices (sel, 1, count);
2663 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2664 indices);
2666 else if (known_eq (nunits, gather_off_nunits * 2))
2668 modifier = NARROW;
2670 /* Currently narrowing gathers and scatters are only supported for
2671 fixed-length vectors. */
2672 int count = nunits.to_constant ();
2673 vec_perm_builder sel (count, count, 1);
2674 sel.quick_grow (count);
2675 for (int i = 0; i < count; ++i)
2676 sel[i] = i < count / 2 ? i : i + count / 2;
2677 vec_perm_indices indices (sel, 2, count);
2678 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2680 ncopies *= 2;
2682 if (mask && masktype == real_masktype)
2684 for (int i = 0; i < count; ++i)
2685 sel[i] = i | (count / 2);
2686 indices.new_vector (sel, 2, count);
2687 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2689 else if (mask)
2690 mask_halftype = truth_type_for (gs_info->offset_vectype);
2692 else
2693 gcc_unreachable ();
2695 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2696 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2698 tree ptr = fold_convert (ptrtype, gs_info->base);
2699 if (!is_gimple_min_invariant (ptr))
2701 gimple_seq seq;
2702 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2703 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2704 gcc_assert (!new_bb);
2707 tree scale = build_int_cst (scaletype, gs_info->scale);
2709 tree vec_oprnd0 = NULL_TREE;
2710 tree vec_mask = NULL_TREE;
2711 tree src_op = NULL_TREE;
2712 tree mask_op = NULL_TREE;
2713 tree prev_res = NULL_TREE;
2715 if (!mask)
2717 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2718 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2721 auto_vec<tree> vec_oprnds0;
2722 auto_vec<tree> vec_masks;
2723 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2724 modifier == WIDEN ? ncopies / 2 : ncopies,
2725 gs_info->offset, &vec_oprnds0);
2726 if (mask)
2727 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2728 modifier == NARROW ? ncopies / 2 : ncopies,
2729 mask, &vec_masks);
2730 for (int j = 0; j < ncopies; ++j)
2732 tree op, var;
2733 if (modifier == WIDEN && (j & 1))
2734 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2735 perm_mask, stmt_info, gsi);
2736 else
2737 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2739 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2741 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2742 TYPE_VECTOR_SUBPARTS (idxtype)));
2743 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2744 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2745 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2746 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2747 op = var;
2750 if (mask)
2752 if (mask_perm_mask && (j & 1))
2753 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2754 mask_perm_mask, stmt_info, gsi);
2755 else
2757 if (modifier == NARROW)
2759 if ((j & 1) == 0)
2760 vec_mask = vec_masks[j / 2];
2762 else
2763 vec_mask = vec_masks[j];
2765 mask_op = vec_mask;
2766 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2768 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2769 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2770 gcc_assert (known_eq (sub1, sub2));
2771 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2772 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2773 gassign *new_stmt
2774 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2775 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2776 mask_op = var;
2779 if (modifier == NARROW && masktype != real_masktype)
2781 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2782 gassign *new_stmt
2783 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2784 : VEC_UNPACK_LO_EXPR,
2785 mask_op);
2786 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2787 mask_op = var;
2789 src_op = mask_op;
2792 tree mask_arg = mask_op;
2793 if (masktype != real_masktype)
2795 tree utype, optype = TREE_TYPE (mask_op);
2796 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2797 utype = real_masktype;
2798 else
2799 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2800 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2801 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2802 gassign *new_stmt
2803 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2804 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2805 mask_arg = var;
2806 if (!useless_type_conversion_p (real_masktype, utype))
2808 gcc_assert (TYPE_PRECISION (utype)
2809 <= TYPE_PRECISION (real_masktype));
2810 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2811 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2812 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2813 mask_arg = var;
2815 src_op = build_zero_cst (srctype);
2817 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2818 mask_arg, scale);
2820 if (!useless_type_conversion_p (vectype, rettype))
2822 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2823 TYPE_VECTOR_SUBPARTS (rettype)));
2824 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2825 gimple_call_set_lhs (new_stmt, op);
2826 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2827 var = make_ssa_name (vec_dest);
2828 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2829 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2830 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2832 else
2834 var = make_ssa_name (vec_dest, new_stmt);
2835 gimple_call_set_lhs (new_stmt, var);
2836 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2839 if (modifier == NARROW)
2841 if ((j & 1) == 0)
2843 prev_res = var;
2844 continue;
2846 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2847 stmt_info, gsi);
2848 new_stmt = SSA_NAME_DEF_STMT (var);
2851 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2853 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2856 /* Prepare the base and offset in GS_INFO for vectorization.
2857 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2858 to the vectorized offset argument for the first copy of STMT_INFO.
2859 STMT_INFO is the statement described by GS_INFO and LOOP is the
2860 containing loop. */
2862 static void
2863 vect_get_gather_scatter_ops (vec_info *vinfo,
2864 class loop *loop, stmt_vec_info stmt_info,
2865 gather_scatter_info *gs_info,
2866 tree *dataref_ptr, vec<tree> *vec_offset,
2867 unsigned ncopies)
2869 gimple_seq stmts = NULL;
2870 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2871 if (stmts != NULL)
2873 basic_block new_bb;
2874 edge pe = loop_preheader_edge (loop);
2875 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2876 gcc_assert (!new_bb);
2878 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2879 vec_offset, gs_info->offset_vectype);
2882 /* Prepare to implement a grouped or strided load or store using
2883 the gather load or scatter store operation described by GS_INFO.
2884 STMT_INFO is the load or store statement.
2886 Set *DATAREF_BUMP to the amount that should be added to the base
2887 address after each copy of the vectorized statement. Set *VEC_OFFSET
2888 to an invariant offset vector in which element I has the value
2889 I * DR_STEP / SCALE. */
2891 static void
2892 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2893 loop_vec_info loop_vinfo,
2894 gather_scatter_info *gs_info,
2895 tree *dataref_bump, tree *vec_offset)
2897 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2898 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2900 tree bump = size_binop (MULT_EXPR,
2901 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2902 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2903 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2905 /* The offset given in GS_INFO can have pointer type, so use the element
2906 type of the vector instead. */
2907 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2909 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2910 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2911 ssize_int (gs_info->scale));
2912 step = fold_convert (offset_type, step);
2914 /* Create {0, X, X*2, X*3, ...}. */
2915 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2916 build_zero_cst (offset_type), step);
2917 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2920 /* Return the amount that should be added to a vector pointer to move
2921 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2922 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2923 vectorization. */
2925 static tree
2926 vect_get_data_ptr_increment (vec_info *vinfo,
2927 dr_vec_info *dr_info, tree aggr_type,
2928 vect_memory_access_type memory_access_type)
2930 if (memory_access_type == VMAT_INVARIANT)
2931 return size_zero_node;
2933 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2934 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2935 if (tree_int_cst_sgn (step) == -1)
2936 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2937 return iv_step;
2940 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2942 static bool
2943 vectorizable_bswap (vec_info *vinfo,
2944 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2945 gimple **vec_stmt, slp_tree slp_node,
2946 slp_tree *slp_op,
2947 tree vectype_in, stmt_vector_for_cost *cost_vec)
2949 tree op, vectype;
2950 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2951 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2952 unsigned ncopies;
2954 op = gimple_call_arg (stmt, 0);
2955 vectype = STMT_VINFO_VECTYPE (stmt_info);
2956 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2958 /* Multiple types in SLP are handled by creating the appropriate number of
2959 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2960 case of SLP. */
2961 if (slp_node)
2962 ncopies = 1;
2963 else
2964 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2966 gcc_assert (ncopies >= 1);
2968 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2969 if (! char_vectype)
2970 return false;
2972 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2973 unsigned word_bytes;
2974 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2975 return false;
2977 /* The encoding uses one stepped pattern for each byte in the word. */
2978 vec_perm_builder elts (num_bytes, word_bytes, 3);
2979 for (unsigned i = 0; i < 3; ++i)
2980 for (unsigned j = 0; j < word_bytes; ++j)
2981 elts.quick_push ((i + 1) * word_bytes - j - 1);
2983 vec_perm_indices indices (elts, 1, num_bytes);
2984 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2985 return false;
2987 if (! vec_stmt)
2989 if (slp_node
2990 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2992 if (dump_enabled_p ())
2993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2994 "incompatible vector types for invariants\n");
2995 return false;
2998 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2999 DUMP_VECT_SCOPE ("vectorizable_bswap");
3000 if (! slp_node)
3002 record_stmt_cost (cost_vec,
3003 1, vector_stmt, stmt_info, 0, vect_prologue);
3004 record_stmt_cost (cost_vec,
3005 ncopies, vec_perm, stmt_info, 0, vect_body);
3007 return true;
3010 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3012 /* Transform. */
3013 vec<tree> vec_oprnds = vNULL;
3014 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3015 op, &vec_oprnds);
3016 /* Arguments are ready. create the new vector stmt. */
3017 unsigned i;
3018 tree vop;
3019 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3021 gimple *new_stmt;
3022 tree tem = make_ssa_name (char_vectype);
3023 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3024 char_vectype, vop));
3025 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3026 tree tem2 = make_ssa_name (char_vectype);
3027 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3028 tem, tem, bswap_vconst);
3029 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3030 tem = make_ssa_name (vectype);
3031 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3032 vectype, tem2));
3033 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3034 if (slp_node)
3035 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3036 else
3037 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3040 if (!slp_node)
3041 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3043 vec_oprnds.release ();
3044 return true;
3047 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3048 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3049 in a single step. On success, store the binary pack code in
3050 *CONVERT_CODE. */
3052 static bool
3053 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3054 tree_code *convert_code)
3056 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3057 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3058 return false;
3060 tree_code code;
3061 int multi_step_cvt = 0;
3062 auto_vec <tree, 8> interm_types;
3063 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3064 &code, &multi_step_cvt, &interm_types)
3065 || multi_step_cvt)
3066 return false;
3068 *convert_code = code;
3069 return true;
3072 /* Function vectorizable_call.
3074 Check if STMT_INFO performs a function call that can be vectorized.
3075 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3076 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3077 Return true if STMT_INFO is vectorizable in this way. */
3079 static bool
3080 vectorizable_call (vec_info *vinfo,
3081 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3082 gimple **vec_stmt, slp_tree slp_node,
3083 stmt_vector_for_cost *cost_vec)
3085 gcall *stmt;
3086 tree vec_dest;
3087 tree scalar_dest;
3088 tree op;
3089 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3090 tree vectype_out, vectype_in;
3091 poly_uint64 nunits_in;
3092 poly_uint64 nunits_out;
3093 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3094 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3095 tree fndecl, new_temp, rhs_type;
3096 enum vect_def_type dt[4]
3097 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3098 vect_unknown_def_type };
3099 tree vectypes[ARRAY_SIZE (dt)] = {};
3100 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3101 int ndts = ARRAY_SIZE (dt);
3102 int ncopies, j;
3103 auto_vec<tree, 8> vargs;
3104 auto_vec<tree, 8> orig_vargs;
3105 enum { NARROW, NONE, WIDEN } modifier;
3106 size_t i, nargs;
3107 tree lhs;
3109 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3110 return false;
3112 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3113 && ! vec_stmt)
3114 return false;
3116 /* Is STMT_INFO a vectorizable call? */
3117 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3118 if (!stmt)
3119 return false;
3121 if (gimple_call_internal_p (stmt)
3122 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3123 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3124 /* Handled by vectorizable_load and vectorizable_store. */
3125 return false;
3127 if (gimple_call_lhs (stmt) == NULL_TREE
3128 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3129 return false;
3131 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3133 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3135 /* Process function arguments. */
3136 rhs_type = NULL_TREE;
3137 vectype_in = NULL_TREE;
3138 nargs = gimple_call_num_args (stmt);
3140 /* Bail out if the function has more than four arguments, we do not have
3141 interesting builtin functions to vectorize with more than two arguments
3142 except for fma. No arguments is also not good. */
3143 if (nargs == 0 || nargs > 4)
3144 return false;
3146 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3147 combined_fn cfn = gimple_call_combined_fn (stmt);
3148 if (cfn == CFN_GOMP_SIMD_LANE)
3150 nargs = 0;
3151 rhs_type = unsigned_type_node;
3154 int mask_opno = -1;
3155 if (internal_fn_p (cfn))
3156 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3158 for (i = 0; i < nargs; i++)
3160 if ((int) i == mask_opno)
3162 op = gimple_call_arg (stmt, i);
3163 if (!vect_check_scalar_mask (vinfo,
3164 stmt_info, op, &dt[i], &vectypes[i]))
3165 return false;
3166 continue;
3169 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3170 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3172 if (dump_enabled_p ())
3173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3174 "use not simple.\n");
3175 return false;
3178 /* We can only handle calls with arguments of the same type. */
3179 if (rhs_type
3180 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3182 if (dump_enabled_p ())
3183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3184 "argument types differ.\n");
3185 return false;
3187 if (!rhs_type)
3188 rhs_type = TREE_TYPE (op);
3190 if (!vectype_in)
3191 vectype_in = vectypes[i];
3192 else if (vectypes[i]
3193 && !types_compatible_p (vectypes[i], vectype_in))
3195 if (dump_enabled_p ())
3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3197 "argument vector types differ.\n");
3198 return false;
3201 /* If all arguments are external or constant defs, infer the vector type
3202 from the scalar type. */
3203 if (!vectype_in)
3204 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3205 if (vec_stmt)
3206 gcc_assert (vectype_in);
3207 if (!vectype_in)
3209 if (dump_enabled_p ())
3210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3211 "no vectype for scalar type %T\n", rhs_type);
3213 return false;
3215 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3216 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3217 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3218 by a pack of the two vectors into an SI vector. We would need
3219 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3220 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3222 if (dump_enabled_p ())
3223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3224 "mismatched vector sizes %T and %T\n",
3225 vectype_in, vectype_out);
3226 return false;
3229 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3230 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3232 if (dump_enabled_p ())
3233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3234 "mixed mask and nonmask vector types\n");
3235 return false;
3238 /* FORNOW */
3239 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3240 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3241 if (known_eq (nunits_in * 2, nunits_out))
3242 modifier = NARROW;
3243 else if (known_eq (nunits_out, nunits_in))
3244 modifier = NONE;
3245 else if (known_eq (nunits_out * 2, nunits_in))
3246 modifier = WIDEN;
3247 else
3248 return false;
3250 /* We only handle functions that do not read or clobber memory. */
3251 if (gimple_vuse (stmt))
3253 if (dump_enabled_p ())
3254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3255 "function reads from or writes to memory.\n");
3256 return false;
3259 /* For now, we only vectorize functions if a target specific builtin
3260 is available. TODO -- in some cases, it might be profitable to
3261 insert the calls for pieces of the vector, in order to be able
3262 to vectorize other operations in the loop. */
3263 fndecl = NULL_TREE;
3264 internal_fn ifn = IFN_LAST;
3265 tree callee = gimple_call_fndecl (stmt);
3267 /* First try using an internal function. */
3268 tree_code convert_code = ERROR_MARK;
3269 if (cfn != CFN_LAST
3270 && (modifier == NONE
3271 || (modifier == NARROW
3272 && simple_integer_narrowing (vectype_out, vectype_in,
3273 &convert_code))))
3274 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3275 vectype_in);
3277 /* If that fails, try asking for a target-specific built-in function. */
3278 if (ifn == IFN_LAST)
3280 if (cfn != CFN_LAST)
3281 fndecl = targetm.vectorize.builtin_vectorized_function
3282 (cfn, vectype_out, vectype_in);
3283 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3284 fndecl = targetm.vectorize.builtin_md_vectorized_function
3285 (callee, vectype_out, vectype_in);
3288 if (ifn == IFN_LAST && !fndecl)
3290 if (cfn == CFN_GOMP_SIMD_LANE
3291 && !slp_node
3292 && loop_vinfo
3293 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3294 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3295 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3296 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3298 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3299 { 0, 1, 2, ... vf - 1 } vector. */
3300 gcc_assert (nargs == 0);
3302 else if (modifier == NONE
3303 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3304 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3305 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3306 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3307 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3308 slp_op, vectype_in, cost_vec);
3309 else
3311 if (dump_enabled_p ())
3312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3313 "function is not vectorizable.\n");
3314 return false;
3318 if (slp_node)
3319 ncopies = 1;
3320 else if (modifier == NARROW && ifn == IFN_LAST)
3321 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3322 else
3323 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3325 /* Sanity check: make sure that at least one copy of the vectorized stmt
3326 needs to be generated. */
3327 gcc_assert (ncopies >= 1);
3329 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3330 if (!vec_stmt) /* transformation not required. */
3332 if (slp_node)
3333 for (i = 0; i < nargs; ++i)
3334 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3336 if (dump_enabled_p ())
3337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3338 "incompatible vector types for invariants\n");
3339 return false;
3341 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3342 DUMP_VECT_SCOPE ("vectorizable_call");
3343 vect_model_simple_cost (vinfo, stmt_info,
3344 ncopies, dt, ndts, slp_node, cost_vec);
3345 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3346 record_stmt_cost (cost_vec, ncopies / 2,
3347 vec_promote_demote, stmt_info, 0, vect_body);
3349 if (loop_vinfo && mask_opno >= 0)
3351 unsigned int nvectors = (slp_node
3352 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3353 : ncopies);
3354 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3355 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3356 vectype_out, scalar_mask);
3358 return true;
3361 /* Transform. */
3363 if (dump_enabled_p ())
3364 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3366 /* Handle def. */
3367 scalar_dest = gimple_call_lhs (stmt);
3368 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3370 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3372 if (modifier == NONE || ifn != IFN_LAST)
3374 tree prev_res = NULL_TREE;
3375 vargs.safe_grow (nargs, true);
3376 orig_vargs.safe_grow (nargs, true);
3377 auto_vec<vec<tree> > vec_defs (nargs);
3378 for (j = 0; j < ncopies; ++j)
3380 /* Build argument list for the vectorized call. */
3381 if (slp_node)
3383 vec<tree> vec_oprnds0;
3385 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3386 vec_oprnds0 = vec_defs[0];
3388 /* Arguments are ready. Create the new vector stmt. */
3389 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3391 size_t k;
3392 for (k = 0; k < nargs; k++)
3394 vec<tree> vec_oprndsk = vec_defs[k];
3395 vargs[k] = vec_oprndsk[i];
3397 gimple *new_stmt;
3398 if (modifier == NARROW)
3400 /* We don't define any narrowing conditional functions
3401 at present. */
3402 gcc_assert (mask_opno < 0);
3403 tree half_res = make_ssa_name (vectype_in);
3404 gcall *call
3405 = gimple_build_call_internal_vec (ifn, vargs);
3406 gimple_call_set_lhs (call, half_res);
3407 gimple_call_set_nothrow (call, true);
3408 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3409 if ((i & 1) == 0)
3411 prev_res = half_res;
3412 continue;
3414 new_temp = make_ssa_name (vec_dest);
3415 new_stmt = gimple_build_assign (new_temp, convert_code,
3416 prev_res, half_res);
3417 vect_finish_stmt_generation (vinfo, stmt_info,
3418 new_stmt, gsi);
3420 else
3422 if (mask_opno >= 0 && masked_loop_p)
3424 unsigned int vec_num = vec_oprnds0.length ();
3425 /* Always true for SLP. */
3426 gcc_assert (ncopies == 1);
3427 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3428 vectype_out, i);
3429 vargs[mask_opno] = prepare_load_store_mask
3430 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3433 gcall *call;
3434 if (ifn != IFN_LAST)
3435 call = gimple_build_call_internal_vec (ifn, vargs);
3436 else
3437 call = gimple_build_call_vec (fndecl, vargs);
3438 new_temp = make_ssa_name (vec_dest, call);
3439 gimple_call_set_lhs (call, new_temp);
3440 gimple_call_set_nothrow (call, true);
3441 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3442 new_stmt = call;
3444 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3446 continue;
3449 for (i = 0; i < nargs; i++)
3451 op = gimple_call_arg (stmt, i);
3452 if (j == 0)
3454 vec_defs.quick_push (vNULL);
3455 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3456 op, &vec_defs[i]);
3458 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3461 if (mask_opno >= 0 && masked_loop_p)
3463 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3464 vectype_out, j);
3465 vargs[mask_opno]
3466 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3467 vargs[mask_opno], gsi);
3470 gimple *new_stmt;
3471 if (cfn == CFN_GOMP_SIMD_LANE)
3473 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3474 tree new_var
3475 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3476 gimple *init_stmt = gimple_build_assign (new_var, cst);
3477 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3478 new_temp = make_ssa_name (vec_dest);
3479 new_stmt = gimple_build_assign (new_temp, new_var);
3480 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3482 else if (modifier == NARROW)
3484 /* We don't define any narrowing conditional functions at
3485 present. */
3486 gcc_assert (mask_opno < 0);
3487 tree half_res = make_ssa_name (vectype_in);
3488 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3489 gimple_call_set_lhs (call, half_res);
3490 gimple_call_set_nothrow (call, true);
3491 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3492 if ((j & 1) == 0)
3494 prev_res = half_res;
3495 continue;
3497 new_temp = make_ssa_name (vec_dest);
3498 new_stmt = gimple_build_assign (new_temp, convert_code,
3499 prev_res, half_res);
3500 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3502 else
3504 gcall *call;
3505 if (ifn != IFN_LAST)
3506 call = gimple_build_call_internal_vec (ifn, vargs);
3507 else
3508 call = gimple_build_call_vec (fndecl, vargs);
3509 new_temp = make_ssa_name (vec_dest, call);
3510 gimple_call_set_lhs (call, new_temp);
3511 gimple_call_set_nothrow (call, true);
3512 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3513 new_stmt = call;
3516 if (j == (modifier == NARROW ? 1 : 0))
3517 *vec_stmt = new_stmt;
3518 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3520 for (i = 0; i < nargs; i++)
3522 vec<tree> vec_oprndsi = vec_defs[i];
3523 vec_oprndsi.release ();
3526 else if (modifier == NARROW)
3528 auto_vec<vec<tree> > vec_defs (nargs);
3529 /* We don't define any narrowing conditional functions at present. */
3530 gcc_assert (mask_opno < 0);
3531 for (j = 0; j < ncopies; ++j)
3533 /* Build argument list for the vectorized call. */
3534 if (j == 0)
3535 vargs.create (nargs * 2);
3536 else
3537 vargs.truncate (0);
3539 if (slp_node)
3541 vec<tree> vec_oprnds0;
3543 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3544 vec_oprnds0 = vec_defs[0];
3546 /* Arguments are ready. Create the new vector stmt. */
3547 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3549 size_t k;
3550 vargs.truncate (0);
3551 for (k = 0; k < nargs; k++)
3553 vec<tree> vec_oprndsk = vec_defs[k];
3554 vargs.quick_push (vec_oprndsk[i]);
3555 vargs.quick_push (vec_oprndsk[i + 1]);
3557 gcall *call;
3558 if (ifn != IFN_LAST)
3559 call = gimple_build_call_internal_vec (ifn, vargs);
3560 else
3561 call = gimple_build_call_vec (fndecl, vargs);
3562 new_temp = make_ssa_name (vec_dest, call);
3563 gimple_call_set_lhs (call, new_temp);
3564 gimple_call_set_nothrow (call, true);
3565 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3566 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3568 continue;
3571 for (i = 0; i < nargs; i++)
3573 op = gimple_call_arg (stmt, i);
3574 if (j == 0)
3576 vec_defs.quick_push (vNULL);
3577 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3578 op, &vec_defs[i], vectypes[i]);
3580 vec_oprnd0 = vec_defs[i][2*j];
3581 vec_oprnd1 = vec_defs[i][2*j+1];
3583 vargs.quick_push (vec_oprnd0);
3584 vargs.quick_push (vec_oprnd1);
3587 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3588 new_temp = make_ssa_name (vec_dest, new_stmt);
3589 gimple_call_set_lhs (new_stmt, new_temp);
3590 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3592 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3595 if (!slp_node)
3596 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3598 for (i = 0; i < nargs; i++)
3600 vec<tree> vec_oprndsi = vec_defs[i];
3601 vec_oprndsi.release ();
3604 else
3605 /* No current target implements this case. */
3606 return false;
3608 vargs.release ();
3610 /* The call in STMT might prevent it from being removed in dce.
3611 We however cannot remove it here, due to the way the ssa name
3612 it defines is mapped to the new definition. So just replace
3613 rhs of the statement with something harmless. */
3615 if (slp_node)
3616 return true;
3618 stmt_info = vect_orig_stmt (stmt_info);
3619 lhs = gimple_get_lhs (stmt_info->stmt);
3621 gassign *new_stmt
3622 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3623 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3625 return true;
3629 struct simd_call_arg_info
3631 tree vectype;
3632 tree op;
3633 HOST_WIDE_INT linear_step;
3634 enum vect_def_type dt;
3635 unsigned int align;
3636 bool simd_lane_linear;
3639 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3640 is linear within simd lane (but not within whole loop), note it in
3641 *ARGINFO. */
3643 static void
3644 vect_simd_lane_linear (tree op, class loop *loop,
3645 struct simd_call_arg_info *arginfo)
3647 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3649 if (!is_gimple_assign (def_stmt)
3650 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3651 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3652 return;
3654 tree base = gimple_assign_rhs1 (def_stmt);
3655 HOST_WIDE_INT linear_step = 0;
3656 tree v = gimple_assign_rhs2 (def_stmt);
3657 while (TREE_CODE (v) == SSA_NAME)
3659 tree t;
3660 def_stmt = SSA_NAME_DEF_STMT (v);
3661 if (is_gimple_assign (def_stmt))
3662 switch (gimple_assign_rhs_code (def_stmt))
3664 case PLUS_EXPR:
3665 t = gimple_assign_rhs2 (def_stmt);
3666 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3667 return;
3668 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3669 v = gimple_assign_rhs1 (def_stmt);
3670 continue;
3671 case MULT_EXPR:
3672 t = gimple_assign_rhs2 (def_stmt);
3673 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3674 return;
3675 linear_step = tree_to_shwi (t);
3676 v = gimple_assign_rhs1 (def_stmt);
3677 continue;
3678 CASE_CONVERT:
3679 t = gimple_assign_rhs1 (def_stmt);
3680 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3681 || (TYPE_PRECISION (TREE_TYPE (v))
3682 < TYPE_PRECISION (TREE_TYPE (t))))
3683 return;
3684 if (!linear_step)
3685 linear_step = 1;
3686 v = t;
3687 continue;
3688 default:
3689 return;
3691 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3692 && loop->simduid
3693 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3694 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3695 == loop->simduid))
3697 if (!linear_step)
3698 linear_step = 1;
3699 arginfo->linear_step = linear_step;
3700 arginfo->op = base;
3701 arginfo->simd_lane_linear = true;
3702 return;
3707 /* Return the number of elements in vector type VECTYPE, which is associated
3708 with a SIMD clone. At present these vectors always have a constant
3709 length. */
3711 static unsigned HOST_WIDE_INT
3712 simd_clone_subparts (tree vectype)
3714 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3717 /* Function vectorizable_simd_clone_call.
3719 Check if STMT_INFO performs a function call that can be vectorized
3720 by calling a simd clone of the function.
3721 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3722 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3723 Return true if STMT_INFO is vectorizable in this way. */
3725 static bool
3726 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3727 gimple_stmt_iterator *gsi,
3728 gimple **vec_stmt, slp_tree slp_node,
3729 stmt_vector_for_cost *)
3731 tree vec_dest;
3732 tree scalar_dest;
3733 tree op, type;
3734 tree vec_oprnd0 = NULL_TREE;
3735 tree vectype;
3736 unsigned int nunits;
3737 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3738 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3739 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3740 tree fndecl, new_temp;
3741 int ncopies, j;
3742 auto_vec<simd_call_arg_info> arginfo;
3743 vec<tree> vargs = vNULL;
3744 size_t i, nargs;
3745 tree lhs, rtype, ratype;
3746 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3748 /* Is STMT a vectorizable call? */
3749 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3750 if (!stmt)
3751 return false;
3753 fndecl = gimple_call_fndecl (stmt);
3754 if (fndecl == NULL_TREE)
3755 return false;
3757 struct cgraph_node *node = cgraph_node::get (fndecl);
3758 if (node == NULL || node->simd_clones == NULL)
3759 return false;
3761 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3762 return false;
3764 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3765 && ! vec_stmt)
3766 return false;
3768 if (gimple_call_lhs (stmt)
3769 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3770 return false;
3772 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3774 vectype = STMT_VINFO_VECTYPE (stmt_info);
3776 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3777 return false;
3779 /* FORNOW */
3780 if (slp_node)
3781 return false;
3783 /* Process function arguments. */
3784 nargs = gimple_call_num_args (stmt);
3786 /* Bail out if the function has zero arguments. */
3787 if (nargs == 0)
3788 return false;
3790 arginfo.reserve (nargs, true);
3792 for (i = 0; i < nargs; i++)
3794 simd_call_arg_info thisarginfo;
3795 affine_iv iv;
3797 thisarginfo.linear_step = 0;
3798 thisarginfo.align = 0;
3799 thisarginfo.op = NULL_TREE;
3800 thisarginfo.simd_lane_linear = false;
3802 op = gimple_call_arg (stmt, i);
3803 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3804 &thisarginfo.vectype)
3805 || thisarginfo.dt == vect_uninitialized_def)
3807 if (dump_enabled_p ())
3808 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3809 "use not simple.\n");
3810 return false;
3813 if (thisarginfo.dt == vect_constant_def
3814 || thisarginfo.dt == vect_external_def)
3815 gcc_assert (thisarginfo.vectype == NULL_TREE);
3816 else
3818 gcc_assert (thisarginfo.vectype != NULL_TREE);
3819 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3821 if (dump_enabled_p ())
3822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3823 "vector mask arguments are not supported\n");
3824 return false;
3828 /* For linear arguments, the analyze phase should have saved
3829 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3830 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3831 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3833 gcc_assert (vec_stmt);
3834 thisarginfo.linear_step
3835 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3836 thisarginfo.op
3837 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3838 thisarginfo.simd_lane_linear
3839 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3840 == boolean_true_node);
3841 /* If loop has been peeled for alignment, we need to adjust it. */
3842 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3843 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3844 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3846 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3847 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3848 tree opt = TREE_TYPE (thisarginfo.op);
3849 bias = fold_convert (TREE_TYPE (step), bias);
3850 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3851 thisarginfo.op
3852 = fold_build2 (POINTER_TYPE_P (opt)
3853 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3854 thisarginfo.op, bias);
3857 else if (!vec_stmt
3858 && thisarginfo.dt != vect_constant_def
3859 && thisarginfo.dt != vect_external_def
3860 && loop_vinfo
3861 && TREE_CODE (op) == SSA_NAME
3862 && simple_iv (loop, loop_containing_stmt (stmt), op,
3863 &iv, false)
3864 && tree_fits_shwi_p (iv.step))
3866 thisarginfo.linear_step = tree_to_shwi (iv.step);
3867 thisarginfo.op = iv.base;
3869 else if ((thisarginfo.dt == vect_constant_def
3870 || thisarginfo.dt == vect_external_def)
3871 && POINTER_TYPE_P (TREE_TYPE (op)))
3872 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3873 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3874 linear too. */
3875 if (POINTER_TYPE_P (TREE_TYPE (op))
3876 && !thisarginfo.linear_step
3877 && !vec_stmt
3878 && thisarginfo.dt != vect_constant_def
3879 && thisarginfo.dt != vect_external_def
3880 && loop_vinfo
3881 && !slp_node
3882 && TREE_CODE (op) == SSA_NAME)
3883 vect_simd_lane_linear (op, loop, &thisarginfo);
3885 arginfo.quick_push (thisarginfo);
3888 unsigned HOST_WIDE_INT vf;
3889 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3891 if (dump_enabled_p ())
3892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3893 "not considering SIMD clones; not yet supported"
3894 " for variable-width vectors.\n");
3895 return false;
3898 unsigned int badness = 0;
3899 struct cgraph_node *bestn = NULL;
3900 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3901 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3902 else
3903 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3904 n = n->simdclone->next_clone)
3906 unsigned int this_badness = 0;
3907 if (n->simdclone->simdlen > vf
3908 || n->simdclone->nargs != nargs)
3909 continue;
3910 if (n->simdclone->simdlen < vf)
3911 this_badness += (exact_log2 (vf)
3912 - exact_log2 (n->simdclone->simdlen)) * 1024;
3913 if (n->simdclone->inbranch)
3914 this_badness += 2048;
3915 int target_badness = targetm.simd_clone.usable (n);
3916 if (target_badness < 0)
3917 continue;
3918 this_badness += target_badness * 512;
3919 /* FORNOW: Have to add code to add the mask argument. */
3920 if (n->simdclone->inbranch)
3921 continue;
3922 for (i = 0; i < nargs; i++)
3924 switch (n->simdclone->args[i].arg_type)
3926 case SIMD_CLONE_ARG_TYPE_VECTOR:
3927 if (!useless_type_conversion_p
3928 (n->simdclone->args[i].orig_type,
3929 TREE_TYPE (gimple_call_arg (stmt, i))))
3930 i = -1;
3931 else if (arginfo[i].dt == vect_constant_def
3932 || arginfo[i].dt == vect_external_def
3933 || arginfo[i].linear_step)
3934 this_badness += 64;
3935 break;
3936 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3937 if (arginfo[i].dt != vect_constant_def
3938 && arginfo[i].dt != vect_external_def)
3939 i = -1;
3940 break;
3941 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3942 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3943 if (arginfo[i].dt == vect_constant_def
3944 || arginfo[i].dt == vect_external_def
3945 || (arginfo[i].linear_step
3946 != n->simdclone->args[i].linear_step))
3947 i = -1;
3948 break;
3949 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3950 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3951 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3952 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3953 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3954 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3955 /* FORNOW */
3956 i = -1;
3957 break;
3958 case SIMD_CLONE_ARG_TYPE_MASK:
3959 gcc_unreachable ();
3961 if (i == (size_t) -1)
3962 break;
3963 if (n->simdclone->args[i].alignment > arginfo[i].align)
3965 i = -1;
3966 break;
3968 if (arginfo[i].align)
3969 this_badness += (exact_log2 (arginfo[i].align)
3970 - exact_log2 (n->simdclone->args[i].alignment));
3972 if (i == (size_t) -1)
3973 continue;
3974 if (bestn == NULL || this_badness < badness)
3976 bestn = n;
3977 badness = this_badness;
3981 if (bestn == NULL)
3982 return false;
3984 for (i = 0; i < nargs; i++)
3985 if ((arginfo[i].dt == vect_constant_def
3986 || arginfo[i].dt == vect_external_def)
3987 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3989 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3990 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3991 slp_node);
3992 if (arginfo[i].vectype == NULL
3993 || (simd_clone_subparts (arginfo[i].vectype)
3994 > bestn->simdclone->simdlen))
3995 return false;
3998 fndecl = bestn->decl;
3999 nunits = bestn->simdclone->simdlen;
4000 ncopies = vf / nunits;
4002 /* If the function isn't const, only allow it in simd loops where user
4003 has asserted that at least nunits consecutive iterations can be
4004 performed using SIMD instructions. */
4005 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4006 && gimple_vuse (stmt))
4007 return false;
4009 /* Sanity check: make sure that at least one copy of the vectorized stmt
4010 needs to be generated. */
4011 gcc_assert (ncopies >= 1);
4013 if (!vec_stmt) /* transformation not required. */
4015 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4016 for (i = 0; i < nargs; i++)
4017 if ((bestn->simdclone->args[i].arg_type
4018 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4019 || (bestn->simdclone->args[i].arg_type
4020 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4022 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4023 + 1,
4024 true);
4025 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4026 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4027 ? size_type_node : TREE_TYPE (arginfo[i].op);
4028 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4030 tree sll = arginfo[i].simd_lane_linear
4031 ? boolean_true_node : boolean_false_node;
4032 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4034 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4035 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4036 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4037 dt, slp_node, cost_vec); */
4038 return true;
4041 /* Transform. */
4043 if (dump_enabled_p ())
4044 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4046 /* Handle def. */
4047 scalar_dest = gimple_call_lhs (stmt);
4048 vec_dest = NULL_TREE;
4049 rtype = NULL_TREE;
4050 ratype = NULL_TREE;
4051 if (scalar_dest)
4053 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4054 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4055 if (TREE_CODE (rtype) == ARRAY_TYPE)
4057 ratype = rtype;
4058 rtype = TREE_TYPE (ratype);
4062 auto_vec<vec<tree> > vec_oprnds;
4063 auto_vec<unsigned> vec_oprnds_i;
4064 vec_oprnds.safe_grow_cleared (nargs, true);
4065 vec_oprnds_i.safe_grow_cleared (nargs, true);
4066 for (j = 0; j < ncopies; ++j)
4068 /* Build argument list for the vectorized call. */
4069 if (j == 0)
4070 vargs.create (nargs);
4071 else
4072 vargs.truncate (0);
4074 for (i = 0; i < nargs; i++)
4076 unsigned int k, l, m, o;
4077 tree atype;
4078 op = gimple_call_arg (stmt, i);
4079 switch (bestn->simdclone->args[i].arg_type)
4081 case SIMD_CLONE_ARG_TYPE_VECTOR:
4082 atype = bestn->simdclone->args[i].vector_type;
4083 o = nunits / simd_clone_subparts (atype);
4084 for (m = j * o; m < (j + 1) * o; m++)
4086 if (simd_clone_subparts (atype)
4087 < simd_clone_subparts (arginfo[i].vectype))
4089 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4090 k = (simd_clone_subparts (arginfo[i].vectype)
4091 / simd_clone_subparts (atype));
4092 gcc_assert ((k & (k - 1)) == 0);
4093 if (m == 0)
4095 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4096 ncopies * o / k, op,
4097 &vec_oprnds[i]);
4098 vec_oprnds_i[i] = 0;
4099 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4101 else
4103 vec_oprnd0 = arginfo[i].op;
4104 if ((m & (k - 1)) == 0)
4105 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4107 arginfo[i].op = vec_oprnd0;
4108 vec_oprnd0
4109 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4110 bitsize_int (prec),
4111 bitsize_int ((m & (k - 1)) * prec));
4112 gassign *new_stmt
4113 = gimple_build_assign (make_ssa_name (atype),
4114 vec_oprnd0);
4115 vect_finish_stmt_generation (vinfo, stmt_info,
4116 new_stmt, gsi);
4117 vargs.safe_push (gimple_assign_lhs (new_stmt));
4119 else
4121 k = (simd_clone_subparts (atype)
4122 / simd_clone_subparts (arginfo[i].vectype));
4123 gcc_assert ((k & (k - 1)) == 0);
4124 vec<constructor_elt, va_gc> *ctor_elts;
4125 if (k != 1)
4126 vec_alloc (ctor_elts, k);
4127 else
4128 ctor_elts = NULL;
4129 for (l = 0; l < k; l++)
4131 if (m == 0 && l == 0)
4133 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4134 k * o * ncopies,
4136 &vec_oprnds[i]);
4137 vec_oprnds_i[i] = 0;
4138 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4140 else
4141 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4142 arginfo[i].op = vec_oprnd0;
4143 if (k == 1)
4144 break;
4145 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4146 vec_oprnd0);
4148 if (k == 1)
4149 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4150 atype))
4152 vec_oprnd0
4153 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4154 gassign *new_stmt
4155 = gimple_build_assign (make_ssa_name (atype),
4156 vec_oprnd0);
4157 vect_finish_stmt_generation (vinfo, stmt_info,
4158 new_stmt, gsi);
4159 vargs.safe_push (gimple_assign_lhs (new_stmt));
4161 else
4162 vargs.safe_push (vec_oprnd0);
4163 else
4165 vec_oprnd0 = build_constructor (atype, ctor_elts);
4166 gassign *new_stmt
4167 = gimple_build_assign (make_ssa_name (atype),
4168 vec_oprnd0);
4169 vect_finish_stmt_generation (vinfo, stmt_info,
4170 new_stmt, gsi);
4171 vargs.safe_push (gimple_assign_lhs (new_stmt));
4175 break;
4176 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4177 vargs.safe_push (op);
4178 break;
4179 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4180 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4181 if (j == 0)
4183 gimple_seq stmts;
4184 arginfo[i].op
4185 = force_gimple_operand (unshare_expr (arginfo[i].op),
4186 &stmts, true, NULL_TREE);
4187 if (stmts != NULL)
4189 basic_block new_bb;
4190 edge pe = loop_preheader_edge (loop);
4191 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4192 gcc_assert (!new_bb);
4194 if (arginfo[i].simd_lane_linear)
4196 vargs.safe_push (arginfo[i].op);
4197 break;
4199 tree phi_res = copy_ssa_name (op);
4200 gphi *new_phi = create_phi_node (phi_res, loop->header);
4201 add_phi_arg (new_phi, arginfo[i].op,
4202 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4203 enum tree_code code
4204 = POINTER_TYPE_P (TREE_TYPE (op))
4205 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4206 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4207 ? sizetype : TREE_TYPE (op);
4208 widest_int cst
4209 = wi::mul (bestn->simdclone->args[i].linear_step,
4210 ncopies * nunits);
4211 tree tcst = wide_int_to_tree (type, cst);
4212 tree phi_arg = copy_ssa_name (op);
4213 gassign *new_stmt
4214 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4215 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4216 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4217 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4218 UNKNOWN_LOCATION);
4219 arginfo[i].op = phi_res;
4220 vargs.safe_push (phi_res);
4222 else
4224 enum tree_code code
4225 = POINTER_TYPE_P (TREE_TYPE (op))
4226 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4227 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4228 ? sizetype : TREE_TYPE (op);
4229 widest_int cst
4230 = wi::mul (bestn->simdclone->args[i].linear_step,
4231 j * nunits);
4232 tree tcst = wide_int_to_tree (type, cst);
4233 new_temp = make_ssa_name (TREE_TYPE (op));
4234 gassign *new_stmt
4235 = gimple_build_assign (new_temp, code,
4236 arginfo[i].op, tcst);
4237 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4238 vargs.safe_push (new_temp);
4240 break;
4241 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4242 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4243 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4244 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4245 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4246 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4247 default:
4248 gcc_unreachable ();
4252 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4253 if (vec_dest)
4255 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4256 if (ratype)
4257 new_temp = create_tmp_var (ratype);
4258 else if (useless_type_conversion_p (vectype, rtype))
4259 new_temp = make_ssa_name (vec_dest, new_call);
4260 else
4261 new_temp = make_ssa_name (rtype, new_call);
4262 gimple_call_set_lhs (new_call, new_temp);
4264 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4265 gimple *new_stmt = new_call;
4267 if (vec_dest)
4269 if (simd_clone_subparts (vectype) < nunits)
4271 unsigned int k, l;
4272 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4273 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4274 k = nunits / simd_clone_subparts (vectype);
4275 gcc_assert ((k & (k - 1)) == 0);
4276 for (l = 0; l < k; l++)
4278 tree t;
4279 if (ratype)
4281 t = build_fold_addr_expr (new_temp);
4282 t = build2 (MEM_REF, vectype, t,
4283 build_int_cst (TREE_TYPE (t), l * bytes));
4285 else
4286 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4287 bitsize_int (prec), bitsize_int (l * prec));
4288 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4289 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4291 if (j == 0 && l == 0)
4292 *vec_stmt = new_stmt;
4293 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4296 if (ratype)
4297 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4298 continue;
4300 else if (simd_clone_subparts (vectype) > nunits)
4302 unsigned int k = (simd_clone_subparts (vectype)
4303 / simd_clone_subparts (rtype));
4304 gcc_assert ((k & (k - 1)) == 0);
4305 if ((j & (k - 1)) == 0)
4306 vec_alloc (ret_ctor_elts, k);
4307 if (ratype)
4309 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4310 for (m = 0; m < o; m++)
4312 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4313 size_int (m), NULL_TREE, NULL_TREE);
4314 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4315 tem);
4316 vect_finish_stmt_generation (vinfo, stmt_info,
4317 new_stmt, gsi);
4318 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4319 gimple_assign_lhs (new_stmt));
4321 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4323 else
4324 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4325 if ((j & (k - 1)) != k - 1)
4326 continue;
4327 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4328 new_stmt
4329 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4330 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4332 if ((unsigned) j == k - 1)
4333 *vec_stmt = new_stmt;
4334 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4335 continue;
4337 else if (ratype)
4339 tree t = build_fold_addr_expr (new_temp);
4340 t = build2 (MEM_REF, vectype, t,
4341 build_int_cst (TREE_TYPE (t), 0));
4342 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4343 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4344 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4346 else if (!useless_type_conversion_p (vectype, rtype))
4348 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4349 new_stmt
4350 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4351 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4355 if (j == 0)
4356 *vec_stmt = new_stmt;
4357 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4360 for (i = 0; i < nargs; ++i)
4362 vec<tree> oprndsi = vec_oprnds[i];
4363 oprndsi.release ();
4365 vargs.release ();
4367 /* The call in STMT might prevent it from being removed in dce.
4368 We however cannot remove it here, due to the way the ssa name
4369 it defines is mapped to the new definition. So just replace
4370 rhs of the statement with something harmless. */
4372 if (slp_node)
4373 return true;
4375 gimple *new_stmt;
4376 if (scalar_dest)
4378 type = TREE_TYPE (scalar_dest);
4379 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4380 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4382 else
4383 new_stmt = gimple_build_nop ();
4384 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4385 unlink_stmt_vdef (stmt);
4387 return true;
4391 /* Function vect_gen_widened_results_half
4393 Create a vector stmt whose code, type, number of arguments, and result
4394 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4395 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4396 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4397 needs to be created (DECL is a function-decl of a target-builtin).
4398 STMT_INFO is the original scalar stmt that we are vectorizing. */
4400 static gimple *
4401 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4402 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4403 tree vec_dest, gimple_stmt_iterator *gsi,
4404 stmt_vec_info stmt_info)
4406 gimple *new_stmt;
4407 tree new_temp;
4409 /* Generate half of the widened result: */
4410 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4411 if (op_type != binary_op)
4412 vec_oprnd1 = NULL;
4413 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4414 new_temp = make_ssa_name (vec_dest, new_stmt);
4415 gimple_assign_set_lhs (new_stmt, new_temp);
4416 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4418 return new_stmt;
4422 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4423 For multi-step conversions store the resulting vectors and call the function
4424 recursively. */
4426 static void
4427 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4428 int multi_step_cvt,
4429 stmt_vec_info stmt_info,
4430 vec<tree> vec_dsts,
4431 gimple_stmt_iterator *gsi,
4432 slp_tree slp_node, enum tree_code code)
4434 unsigned int i;
4435 tree vop0, vop1, new_tmp, vec_dest;
4437 vec_dest = vec_dsts.pop ();
4439 for (i = 0; i < vec_oprnds->length (); i += 2)
4441 /* Create demotion operation. */
4442 vop0 = (*vec_oprnds)[i];
4443 vop1 = (*vec_oprnds)[i + 1];
4444 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4445 new_tmp = make_ssa_name (vec_dest, new_stmt);
4446 gimple_assign_set_lhs (new_stmt, new_tmp);
4447 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4449 if (multi_step_cvt)
4450 /* Store the resulting vector for next recursive call. */
4451 (*vec_oprnds)[i/2] = new_tmp;
4452 else
4454 /* This is the last step of the conversion sequence. Store the
4455 vectors in SLP_NODE or in vector info of the scalar statement
4456 (or in STMT_VINFO_RELATED_STMT chain). */
4457 if (slp_node)
4458 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4459 else
4460 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4464 /* For multi-step demotion operations we first generate demotion operations
4465 from the source type to the intermediate types, and then combine the
4466 results (stored in VEC_OPRNDS) in demotion operation to the destination
4467 type. */
4468 if (multi_step_cvt)
4470 /* At each level of recursion we have half of the operands we had at the
4471 previous level. */
4472 vec_oprnds->truncate ((i+1)/2);
4473 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4474 multi_step_cvt - 1,
4475 stmt_info, vec_dsts, gsi,
4476 slp_node, VEC_PACK_TRUNC_EXPR);
4479 vec_dsts.quick_push (vec_dest);
4483 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4484 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4485 STMT_INFO. For multi-step conversions store the resulting vectors and
4486 call the function recursively. */
4488 static void
4489 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4490 vec<tree> *vec_oprnds0,
4491 vec<tree> *vec_oprnds1,
4492 stmt_vec_info stmt_info, tree vec_dest,
4493 gimple_stmt_iterator *gsi,
4494 enum tree_code code1,
4495 enum tree_code code2, int op_type)
4497 int i;
4498 tree vop0, vop1, new_tmp1, new_tmp2;
4499 gimple *new_stmt1, *new_stmt2;
4500 vec<tree> vec_tmp = vNULL;
4502 vec_tmp.create (vec_oprnds0->length () * 2);
4503 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4505 if (op_type == binary_op)
4506 vop1 = (*vec_oprnds1)[i];
4507 else
4508 vop1 = NULL_TREE;
4510 /* Generate the two halves of promotion operation. */
4511 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4512 op_type, vec_dest, gsi,
4513 stmt_info);
4514 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4515 op_type, vec_dest, gsi,
4516 stmt_info);
4517 if (is_gimple_call (new_stmt1))
4519 new_tmp1 = gimple_call_lhs (new_stmt1);
4520 new_tmp2 = gimple_call_lhs (new_stmt2);
4522 else
4524 new_tmp1 = gimple_assign_lhs (new_stmt1);
4525 new_tmp2 = gimple_assign_lhs (new_stmt2);
4528 /* Store the results for the next step. */
4529 vec_tmp.quick_push (new_tmp1);
4530 vec_tmp.quick_push (new_tmp2);
4533 vec_oprnds0->release ();
4534 *vec_oprnds0 = vec_tmp;
4538 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4539 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4540 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4541 Return true if STMT_INFO is vectorizable in this way. */
4543 static bool
4544 vectorizable_conversion (vec_info *vinfo,
4545 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4546 gimple **vec_stmt, slp_tree slp_node,
4547 stmt_vector_for_cost *cost_vec)
4549 tree vec_dest;
4550 tree scalar_dest;
4551 tree op0, op1 = NULL_TREE;
4552 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4553 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4554 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4555 tree new_temp;
4556 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4557 int ndts = 2;
4558 poly_uint64 nunits_in;
4559 poly_uint64 nunits_out;
4560 tree vectype_out, vectype_in;
4561 int ncopies, i;
4562 tree lhs_type, rhs_type;
4563 enum { NARROW, NONE, WIDEN } modifier;
4564 vec<tree> vec_oprnds0 = vNULL;
4565 vec<tree> vec_oprnds1 = vNULL;
4566 tree vop0;
4567 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4568 int multi_step_cvt = 0;
4569 vec<tree> interm_types = vNULL;
4570 tree intermediate_type, cvt_type = NULL_TREE;
4571 int op_type;
4572 unsigned short fltsz;
4574 /* Is STMT a vectorizable conversion? */
4576 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4577 return false;
4579 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4580 && ! vec_stmt)
4581 return false;
4583 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4584 if (!stmt)
4585 return false;
4587 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4588 return false;
4590 code = gimple_assign_rhs_code (stmt);
4591 if (!CONVERT_EXPR_CODE_P (code)
4592 && code != FIX_TRUNC_EXPR
4593 && code != FLOAT_EXPR
4594 && code != WIDEN_MULT_EXPR
4595 && code != WIDEN_LSHIFT_EXPR)
4596 return false;
4598 op_type = TREE_CODE_LENGTH (code);
4600 /* Check types of lhs and rhs. */
4601 scalar_dest = gimple_assign_lhs (stmt);
4602 lhs_type = TREE_TYPE (scalar_dest);
4603 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4605 /* Check the operands of the operation. */
4606 slp_tree slp_op0, slp_op1 = NULL;
4607 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4608 0, &op0, &slp_op0, &dt[0], &vectype_in))
4610 if (dump_enabled_p ())
4611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4612 "use not simple.\n");
4613 return false;
4616 rhs_type = TREE_TYPE (op0);
4617 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4618 && !((INTEGRAL_TYPE_P (lhs_type)
4619 && INTEGRAL_TYPE_P (rhs_type))
4620 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4621 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4622 return false;
4624 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4625 && ((INTEGRAL_TYPE_P (lhs_type)
4626 && !type_has_mode_precision_p (lhs_type))
4627 || (INTEGRAL_TYPE_P (rhs_type)
4628 && !type_has_mode_precision_p (rhs_type))))
4630 if (dump_enabled_p ())
4631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4632 "type conversion to/from bit-precision unsupported."
4633 "\n");
4634 return false;
4637 if (op_type == binary_op)
4639 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4641 op1 = gimple_assign_rhs2 (stmt);
4642 tree vectype1_in;
4643 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4644 &op1, &slp_op1, &dt[1], &vectype1_in))
4646 if (dump_enabled_p ())
4647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4648 "use not simple.\n");
4649 return false;
4651 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4652 OP1. */
4653 if (!vectype_in)
4654 vectype_in = vectype1_in;
4657 /* If op0 is an external or constant def, infer the vector type
4658 from the scalar type. */
4659 if (!vectype_in)
4660 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4661 if (vec_stmt)
4662 gcc_assert (vectype_in);
4663 if (!vectype_in)
4665 if (dump_enabled_p ())
4666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4667 "no vectype for scalar type %T\n", rhs_type);
4669 return false;
4672 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4673 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4675 if (dump_enabled_p ())
4676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4677 "can't convert between boolean and non "
4678 "boolean vectors %T\n", rhs_type);
4680 return false;
4683 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4684 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4685 if (known_eq (nunits_out, nunits_in))
4686 modifier = NONE;
4687 else if (multiple_p (nunits_out, nunits_in))
4688 modifier = NARROW;
4689 else
4691 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4692 modifier = WIDEN;
4695 /* Multiple types in SLP are handled by creating the appropriate number of
4696 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4697 case of SLP. */
4698 if (slp_node)
4699 ncopies = 1;
4700 else if (modifier == NARROW)
4701 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4702 else
4703 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4705 /* Sanity check: make sure that at least one copy of the vectorized stmt
4706 needs to be generated. */
4707 gcc_assert (ncopies >= 1);
4709 bool found_mode = false;
4710 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4711 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4712 opt_scalar_mode rhs_mode_iter;
4714 /* Supportable by target? */
4715 switch (modifier)
4717 case NONE:
4718 if (code != FIX_TRUNC_EXPR
4719 && code != FLOAT_EXPR
4720 && !CONVERT_EXPR_CODE_P (code))
4721 return false;
4722 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4723 break;
4724 /* FALLTHRU */
4725 unsupported:
4726 if (dump_enabled_p ())
4727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4728 "conversion not supported by target.\n");
4729 return false;
4731 case WIDEN:
4732 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4733 vectype_in, &code1, &code2,
4734 &multi_step_cvt, &interm_types))
4736 /* Binary widening operation can only be supported directly by the
4737 architecture. */
4738 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4739 break;
4742 if (code != FLOAT_EXPR
4743 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4744 goto unsupported;
4746 fltsz = GET_MODE_SIZE (lhs_mode);
4747 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4749 rhs_mode = rhs_mode_iter.require ();
4750 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4751 break;
4753 cvt_type
4754 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4755 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4756 if (cvt_type == NULL_TREE)
4757 goto unsupported;
4759 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4761 if (!supportable_convert_operation (code, vectype_out,
4762 cvt_type, &codecvt1))
4763 goto unsupported;
4765 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4766 vectype_out, cvt_type,
4767 &codecvt1, &codecvt2,
4768 &multi_step_cvt,
4769 &interm_types))
4770 continue;
4771 else
4772 gcc_assert (multi_step_cvt == 0);
4774 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4775 cvt_type,
4776 vectype_in, &code1, &code2,
4777 &multi_step_cvt, &interm_types))
4779 found_mode = true;
4780 break;
4784 if (!found_mode)
4785 goto unsupported;
4787 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4788 codecvt2 = ERROR_MARK;
4789 else
4791 multi_step_cvt++;
4792 interm_types.safe_push (cvt_type);
4793 cvt_type = NULL_TREE;
4795 break;
4797 case NARROW:
4798 gcc_assert (op_type == unary_op);
4799 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4800 &code1, &multi_step_cvt,
4801 &interm_types))
4802 break;
4804 if (code != FIX_TRUNC_EXPR
4805 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4806 goto unsupported;
4808 cvt_type
4809 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4810 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4811 if (cvt_type == NULL_TREE)
4812 goto unsupported;
4813 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4814 &codecvt1))
4815 goto unsupported;
4816 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4817 &code1, &multi_step_cvt,
4818 &interm_types))
4819 break;
4820 goto unsupported;
4822 default:
4823 gcc_unreachable ();
4826 if (!vec_stmt) /* transformation not required. */
4828 if (slp_node
4829 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4830 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4832 if (dump_enabled_p ())
4833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4834 "incompatible vector types for invariants\n");
4835 return false;
4837 DUMP_VECT_SCOPE ("vectorizable_conversion");
4838 if (modifier == NONE)
4840 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4841 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4842 cost_vec);
4844 else if (modifier == NARROW)
4846 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4847 /* The final packing step produces one vector result per copy. */
4848 unsigned int nvectors
4849 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4850 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4851 multi_step_cvt, cost_vec);
4853 else
4855 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4856 /* The initial unpacking step produces two vector results
4857 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4858 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4859 unsigned int nvectors
4860 = (slp_node
4861 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4862 : ncopies * 2);
4863 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4864 multi_step_cvt, cost_vec);
4866 interm_types.release ();
4867 return true;
4870 /* Transform. */
4871 if (dump_enabled_p ())
4872 dump_printf_loc (MSG_NOTE, vect_location,
4873 "transform conversion. ncopies = %d.\n", ncopies);
4875 if (op_type == binary_op)
4877 if (CONSTANT_CLASS_P (op0))
4878 op0 = fold_convert (TREE_TYPE (op1), op0);
4879 else if (CONSTANT_CLASS_P (op1))
4880 op1 = fold_convert (TREE_TYPE (op0), op1);
4883 /* In case of multi-step conversion, we first generate conversion operations
4884 to the intermediate types, and then from that types to the final one.
4885 We create vector destinations for the intermediate type (TYPES) received
4886 from supportable_*_operation, and store them in the correct order
4887 for future use in vect_create_vectorized_*_stmts (). */
4888 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4889 vec_dest = vect_create_destination_var (scalar_dest,
4890 (cvt_type && modifier == WIDEN)
4891 ? cvt_type : vectype_out);
4892 vec_dsts.quick_push (vec_dest);
4894 if (multi_step_cvt)
4896 for (i = interm_types.length () - 1;
4897 interm_types.iterate (i, &intermediate_type); i--)
4899 vec_dest = vect_create_destination_var (scalar_dest,
4900 intermediate_type);
4901 vec_dsts.quick_push (vec_dest);
4905 if (cvt_type)
4906 vec_dest = vect_create_destination_var (scalar_dest,
4907 modifier == WIDEN
4908 ? vectype_out : cvt_type);
4910 int ninputs = 1;
4911 if (!slp_node)
4913 if (modifier == WIDEN)
4915 else if (modifier == NARROW)
4917 if (multi_step_cvt)
4918 ninputs = vect_pow2 (multi_step_cvt);
4919 ninputs *= 2;
4923 switch (modifier)
4925 case NONE:
4926 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
4927 op0, &vec_oprnds0);
4928 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4930 /* Arguments are ready, create the new vector stmt. */
4931 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4932 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4933 new_temp = make_ssa_name (vec_dest, new_stmt);
4934 gimple_assign_set_lhs (new_stmt, new_temp);
4935 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4937 if (slp_node)
4938 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4939 else
4940 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4942 break;
4944 case WIDEN:
4945 /* In case the vectorization factor (VF) is bigger than the number
4946 of elements that we can fit in a vectype (nunits), we have to
4947 generate more than one vector stmt - i.e - we need to "unroll"
4948 the vector stmt by a factor VF/nunits. */
4949 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4950 op0, &vec_oprnds0,
4951 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
4952 &vec_oprnds1);
4953 if (code == WIDEN_LSHIFT_EXPR)
4955 vec_oprnds1.create (ncopies * ninputs);
4956 for (i = 0; i < ncopies * ninputs; ++i)
4957 vec_oprnds1.quick_push (op1);
4959 /* Arguments are ready. Create the new vector stmts. */
4960 for (i = multi_step_cvt; i >= 0; i--)
4962 tree this_dest = vec_dsts[i];
4963 enum tree_code c1 = code1, c2 = code2;
4964 if (i == 0 && codecvt2 != ERROR_MARK)
4966 c1 = codecvt1;
4967 c2 = codecvt2;
4969 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
4970 &vec_oprnds1, stmt_info,
4971 this_dest, gsi,
4972 c1, c2, op_type);
4975 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4977 gimple *new_stmt;
4978 if (cvt_type)
4980 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4981 new_temp = make_ssa_name (vec_dest);
4982 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
4983 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4985 else
4986 new_stmt = SSA_NAME_DEF_STMT (vop0);
4988 if (slp_node)
4989 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4990 else
4991 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4993 break;
4995 case NARROW:
4996 /* In case the vectorization factor (VF) is bigger than the number
4997 of elements that we can fit in a vectype (nunits), we have to
4998 generate more than one vector stmt - i.e - we need to "unroll"
4999 the vector stmt by a factor VF/nunits. */
5000 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5001 op0, &vec_oprnds0);
5002 /* Arguments are ready. Create the new vector stmts. */
5003 if (cvt_type)
5004 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5006 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5007 new_temp = make_ssa_name (vec_dest);
5008 gassign *new_stmt
5009 = gimple_build_assign (new_temp, codecvt1, vop0);
5010 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5011 vec_oprnds0[i] = new_temp;
5014 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5015 multi_step_cvt,
5016 stmt_info, vec_dsts, gsi,
5017 slp_node, code1);
5018 break;
5020 if (!slp_node)
5021 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5023 vec_oprnds0.release ();
5024 vec_oprnds1.release ();
5025 interm_types.release ();
5027 return true;
5030 /* Return true if we can assume from the scalar form of STMT_INFO that
5031 neither the scalar nor the vector forms will generate code. STMT_INFO
5032 is known not to involve a data reference. */
5034 bool
5035 vect_nop_conversion_p (stmt_vec_info stmt_info)
5037 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5038 if (!stmt)
5039 return false;
5041 tree lhs = gimple_assign_lhs (stmt);
5042 tree_code code = gimple_assign_rhs_code (stmt);
5043 tree rhs = gimple_assign_rhs1 (stmt);
5045 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5046 return true;
5048 if (CONVERT_EXPR_CODE_P (code))
5049 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5051 return false;
5054 /* Function vectorizable_assignment.
5056 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5057 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5058 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5059 Return true if STMT_INFO is vectorizable in this way. */
5061 static bool
5062 vectorizable_assignment (vec_info *vinfo,
5063 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5064 gimple **vec_stmt, slp_tree slp_node,
5065 stmt_vector_for_cost *cost_vec)
5067 tree vec_dest;
5068 tree scalar_dest;
5069 tree op;
5070 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5071 tree new_temp;
5072 enum vect_def_type dt[1] = {vect_unknown_def_type};
5073 int ndts = 1;
5074 int ncopies;
5075 int i;
5076 vec<tree> vec_oprnds = vNULL;
5077 tree vop;
5078 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5079 enum tree_code code;
5080 tree vectype_in;
5082 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5083 return false;
5085 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5086 && ! vec_stmt)
5087 return false;
5089 /* Is vectorizable assignment? */
5090 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5091 if (!stmt)
5092 return false;
5094 scalar_dest = gimple_assign_lhs (stmt);
5095 if (TREE_CODE (scalar_dest) != SSA_NAME)
5096 return false;
5098 if (STMT_VINFO_DATA_REF (stmt_info))
5099 return false;
5101 code = gimple_assign_rhs_code (stmt);
5102 if (!(gimple_assign_single_p (stmt)
5103 || code == PAREN_EXPR
5104 || CONVERT_EXPR_CODE_P (code)))
5105 return false;
5107 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5108 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5110 /* Multiple types in SLP are handled by creating the appropriate number of
5111 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5112 case of SLP. */
5113 if (slp_node)
5114 ncopies = 1;
5115 else
5116 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5118 gcc_assert (ncopies >= 1);
5120 slp_tree slp_op;
5121 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5122 &dt[0], &vectype_in))
5124 if (dump_enabled_p ())
5125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5126 "use not simple.\n");
5127 return false;
5129 if (!vectype_in)
5130 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5132 /* We can handle NOP_EXPR conversions that do not change the number
5133 of elements or the vector size. */
5134 if ((CONVERT_EXPR_CODE_P (code)
5135 || code == VIEW_CONVERT_EXPR)
5136 && (!vectype_in
5137 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5138 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5139 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5140 return false;
5142 /* We do not handle bit-precision changes. */
5143 if ((CONVERT_EXPR_CODE_P (code)
5144 || code == VIEW_CONVERT_EXPR)
5145 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5146 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5147 || !type_has_mode_precision_p (TREE_TYPE (op)))
5148 /* But a conversion that does not change the bit-pattern is ok. */
5149 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5150 > TYPE_PRECISION (TREE_TYPE (op)))
5151 && TYPE_UNSIGNED (TREE_TYPE (op)))
5152 /* Conversion between boolean types of different sizes is
5153 a simple assignment in case their vectypes are same
5154 boolean vectors. */
5155 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5156 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5158 if (dump_enabled_p ())
5159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5160 "type conversion to/from bit-precision "
5161 "unsupported.\n");
5162 return false;
5165 if (!vec_stmt) /* transformation not required. */
5167 if (slp_node
5168 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5170 if (dump_enabled_p ())
5171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5172 "incompatible vector types for invariants\n");
5173 return false;
5175 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5176 DUMP_VECT_SCOPE ("vectorizable_assignment");
5177 if (!vect_nop_conversion_p (stmt_info))
5178 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5179 cost_vec);
5180 return true;
5183 /* Transform. */
5184 if (dump_enabled_p ())
5185 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5187 /* Handle def. */
5188 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5190 /* Handle use. */
5191 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5193 /* Arguments are ready. create the new vector stmt. */
5194 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5196 if (CONVERT_EXPR_CODE_P (code)
5197 || code == VIEW_CONVERT_EXPR)
5198 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5199 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5200 new_temp = make_ssa_name (vec_dest, new_stmt);
5201 gimple_assign_set_lhs (new_stmt, new_temp);
5202 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5203 if (slp_node)
5204 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5205 else
5206 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5208 if (!slp_node)
5209 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5211 vec_oprnds.release ();
5212 return true;
5216 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5217 either as shift by a scalar or by a vector. */
5219 bool
5220 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5223 machine_mode vec_mode;
5224 optab optab;
5225 int icode;
5226 tree vectype;
5228 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5229 if (!vectype)
5230 return false;
5232 optab = optab_for_tree_code (code, vectype, optab_scalar);
5233 if (!optab
5234 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5236 optab = optab_for_tree_code (code, vectype, optab_vector);
5237 if (!optab
5238 || (optab_handler (optab, TYPE_MODE (vectype))
5239 == CODE_FOR_nothing))
5240 return false;
5243 vec_mode = TYPE_MODE (vectype);
5244 icode = (int) optab_handler (optab, vec_mode);
5245 if (icode == CODE_FOR_nothing)
5246 return false;
5248 return true;
5252 /* Function vectorizable_shift.
5254 Check if STMT_INFO performs a shift operation that can be vectorized.
5255 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5256 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5257 Return true if STMT_INFO is vectorizable in this way. */
5259 static bool
5260 vectorizable_shift (vec_info *vinfo,
5261 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5262 gimple **vec_stmt, slp_tree slp_node,
5263 stmt_vector_for_cost *cost_vec)
5265 tree vec_dest;
5266 tree scalar_dest;
5267 tree op0, op1 = NULL;
5268 tree vec_oprnd1 = NULL_TREE;
5269 tree vectype;
5270 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5271 enum tree_code code;
5272 machine_mode vec_mode;
5273 tree new_temp;
5274 optab optab;
5275 int icode;
5276 machine_mode optab_op2_mode;
5277 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5278 int ndts = 2;
5279 poly_uint64 nunits_in;
5280 poly_uint64 nunits_out;
5281 tree vectype_out;
5282 tree op1_vectype;
5283 int ncopies;
5284 int i;
5285 vec<tree> vec_oprnds0 = vNULL;
5286 vec<tree> vec_oprnds1 = vNULL;
5287 tree vop0, vop1;
5288 unsigned int k;
5289 bool scalar_shift_arg = true;
5290 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5291 bool incompatible_op1_vectype_p = false;
5293 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5294 return false;
5296 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5297 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5298 && ! vec_stmt)
5299 return false;
5301 /* Is STMT a vectorizable binary/unary operation? */
5302 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5303 if (!stmt)
5304 return false;
5306 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5307 return false;
5309 code = gimple_assign_rhs_code (stmt);
5311 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5312 || code == RROTATE_EXPR))
5313 return false;
5315 scalar_dest = gimple_assign_lhs (stmt);
5316 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5317 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5319 if (dump_enabled_p ())
5320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5321 "bit-precision shifts not supported.\n");
5322 return false;
5325 slp_tree slp_op0;
5326 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5327 0, &op0, &slp_op0, &dt[0], &vectype))
5329 if (dump_enabled_p ())
5330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5331 "use not simple.\n");
5332 return false;
5334 /* If op0 is an external or constant def, infer the vector type
5335 from the scalar type. */
5336 if (!vectype)
5337 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5338 if (vec_stmt)
5339 gcc_assert (vectype);
5340 if (!vectype)
5342 if (dump_enabled_p ())
5343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5344 "no vectype for scalar type\n");
5345 return false;
5348 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5349 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5350 if (maybe_ne (nunits_out, nunits_in))
5351 return false;
5353 stmt_vec_info op1_def_stmt_info;
5354 slp_tree slp_op1;
5355 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5356 &dt[1], &op1_vectype, &op1_def_stmt_info))
5358 if (dump_enabled_p ())
5359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5360 "use not simple.\n");
5361 return false;
5364 /* Multiple types in SLP are handled by creating the appropriate number of
5365 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5366 case of SLP. */
5367 if (slp_node)
5368 ncopies = 1;
5369 else
5370 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5372 gcc_assert (ncopies >= 1);
5374 /* Determine whether the shift amount is a vector, or scalar. If the
5375 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5377 if ((dt[1] == vect_internal_def
5378 || dt[1] == vect_induction_def
5379 || dt[1] == vect_nested_cycle)
5380 && !slp_node)
5381 scalar_shift_arg = false;
5382 else if (dt[1] == vect_constant_def
5383 || dt[1] == vect_external_def
5384 || dt[1] == vect_internal_def)
5386 /* In SLP, need to check whether the shift count is the same,
5387 in loops if it is a constant or invariant, it is always
5388 a scalar shift. */
5389 if (slp_node)
5391 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5392 stmt_vec_info slpstmt_info;
5394 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5396 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5397 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5398 scalar_shift_arg = false;
5401 /* For internal SLP defs we have to make sure we see scalar stmts
5402 for all vector elements.
5403 ??? For different vectors we could resort to a different
5404 scalar shift operand but code-generation below simply always
5405 takes the first. */
5406 if (dt[1] == vect_internal_def
5407 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5408 stmts.length ()))
5409 scalar_shift_arg = false;
5412 /* If the shift amount is computed by a pattern stmt we cannot
5413 use the scalar amount directly thus give up and use a vector
5414 shift. */
5415 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5416 scalar_shift_arg = false;
5418 else
5420 if (dump_enabled_p ())
5421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5422 "operand mode requires invariant argument.\n");
5423 return false;
5426 /* Vector shifted by vector. */
5427 bool was_scalar_shift_arg = scalar_shift_arg;
5428 if (!scalar_shift_arg)
5430 optab = optab_for_tree_code (code, vectype, optab_vector);
5431 if (dump_enabled_p ())
5432 dump_printf_loc (MSG_NOTE, vect_location,
5433 "vector/vector shift/rotate found.\n");
5435 if (!op1_vectype)
5436 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5437 slp_op1);
5438 incompatible_op1_vectype_p
5439 = (op1_vectype == NULL_TREE
5440 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5441 TYPE_VECTOR_SUBPARTS (vectype))
5442 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5443 if (incompatible_op1_vectype_p
5444 && (!slp_node
5445 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5446 || slp_op1->refcnt != 1))
5448 if (dump_enabled_p ())
5449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5450 "unusable type for last operand in"
5451 " vector/vector shift/rotate.\n");
5452 return false;
5455 /* See if the machine has a vector shifted by scalar insn and if not
5456 then see if it has a vector shifted by vector insn. */
5457 else
5459 optab = optab_for_tree_code (code, vectype, optab_scalar);
5460 if (optab
5461 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5463 if (dump_enabled_p ())
5464 dump_printf_loc (MSG_NOTE, vect_location,
5465 "vector/scalar shift/rotate found.\n");
5467 else
5469 optab = optab_for_tree_code (code, vectype, optab_vector);
5470 if (optab
5471 && (optab_handler (optab, TYPE_MODE (vectype))
5472 != CODE_FOR_nothing))
5474 scalar_shift_arg = false;
5476 if (dump_enabled_p ())
5477 dump_printf_loc (MSG_NOTE, vect_location,
5478 "vector/vector shift/rotate found.\n");
5480 if (!op1_vectype)
5481 op1_vectype = get_vectype_for_scalar_type (vinfo,
5482 TREE_TYPE (op1),
5483 slp_op1);
5485 /* Unlike the other binary operators, shifts/rotates have
5486 the rhs being int, instead of the same type as the lhs,
5487 so make sure the scalar is the right type if we are
5488 dealing with vectors of long long/long/short/char. */
5489 incompatible_op1_vectype_p
5490 = (!op1_vectype
5491 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5492 TREE_TYPE (op1)));
5493 if (incompatible_op1_vectype_p
5494 && dt[1] == vect_internal_def)
5496 if (dump_enabled_p ())
5497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5498 "unusable type for last operand in"
5499 " vector/vector shift/rotate.\n");
5500 return false;
5506 /* Supportable by target? */
5507 if (!optab)
5509 if (dump_enabled_p ())
5510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5511 "no optab.\n");
5512 return false;
5514 vec_mode = TYPE_MODE (vectype);
5515 icode = (int) optab_handler (optab, vec_mode);
5516 if (icode == CODE_FOR_nothing)
5518 if (dump_enabled_p ())
5519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5520 "op not supported by target.\n");
5521 /* Check only during analysis. */
5522 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5523 || (!vec_stmt
5524 && !vect_worthwhile_without_simd_p (vinfo, code)))
5525 return false;
5526 if (dump_enabled_p ())
5527 dump_printf_loc (MSG_NOTE, vect_location,
5528 "proceeding using word mode.\n");
5531 /* Worthwhile without SIMD support? Check only during analysis. */
5532 if (!vec_stmt
5533 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5534 && !vect_worthwhile_without_simd_p (vinfo, code))
5536 if (dump_enabled_p ())
5537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5538 "not worthwhile without SIMD support.\n");
5539 return false;
5542 if (!vec_stmt) /* transformation not required. */
5544 if (slp_node
5545 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5546 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5547 && (!incompatible_op1_vectype_p
5548 || dt[1] == vect_constant_def)
5549 && !vect_maybe_update_slp_op_vectype
5550 (slp_op1,
5551 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5553 if (dump_enabled_p ())
5554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5555 "incompatible vector types for invariants\n");
5556 return false;
5558 /* Now adjust the constant shift amount in place. */
5559 if (slp_node
5560 && incompatible_op1_vectype_p
5561 && dt[1] == vect_constant_def)
5563 for (unsigned i = 0;
5564 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5566 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5567 = fold_convert (TREE_TYPE (vectype),
5568 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5569 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5570 == INTEGER_CST));
5573 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5574 DUMP_VECT_SCOPE ("vectorizable_shift");
5575 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5576 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5577 return true;
5580 /* Transform. */
5582 if (dump_enabled_p ())
5583 dump_printf_loc (MSG_NOTE, vect_location,
5584 "transform binary/unary operation.\n");
5586 if (incompatible_op1_vectype_p && !slp_node)
5588 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5589 op1 = fold_convert (TREE_TYPE (vectype), op1);
5590 if (dt[1] != vect_constant_def)
5591 op1 = vect_init_vector (vinfo, stmt_info, op1,
5592 TREE_TYPE (vectype), NULL);
5595 /* Handle def. */
5596 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5598 if (scalar_shift_arg && dt[1] != vect_internal_def)
5600 /* Vector shl and shr insn patterns can be defined with scalar
5601 operand 2 (shift operand). In this case, use constant or loop
5602 invariant op1 directly, without extending it to vector mode
5603 first. */
5604 optab_op2_mode = insn_data[icode].operand[2].mode;
5605 if (!VECTOR_MODE_P (optab_op2_mode))
5607 if (dump_enabled_p ())
5608 dump_printf_loc (MSG_NOTE, vect_location,
5609 "operand 1 using scalar mode.\n");
5610 vec_oprnd1 = op1;
5611 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5612 vec_oprnds1.quick_push (vec_oprnd1);
5613 /* Store vec_oprnd1 for every vector stmt to be created.
5614 We check during the analysis that all the shift arguments
5615 are the same.
5616 TODO: Allow different constants for different vector
5617 stmts generated for an SLP instance. */
5618 for (k = 0;
5619 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5620 vec_oprnds1.quick_push (vec_oprnd1);
5623 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5625 if (was_scalar_shift_arg)
5627 /* If the argument was the same in all lanes create
5628 the correctly typed vector shift amount directly. */
5629 op1 = fold_convert (TREE_TYPE (vectype), op1);
5630 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5631 !loop_vinfo ? gsi : NULL);
5632 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5633 !loop_vinfo ? gsi : NULL);
5634 vec_oprnds1.create (slp_node->vec_stmts_size);
5635 for (k = 0; k < slp_node->vec_stmts_size; k++)
5636 vec_oprnds1.quick_push (vec_oprnd1);
5638 else if (dt[1] == vect_constant_def)
5639 /* The constant shift amount has been adjusted in place. */
5641 else
5642 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5645 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5646 (a special case for certain kind of vector shifts); otherwise,
5647 operand 1 should be of a vector type (the usual case). */
5648 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5649 op0, &vec_oprnds0,
5650 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5652 /* Arguments are ready. Create the new vector stmt. */
5653 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5655 /* For internal defs where we need to use a scalar shift arg
5656 extract the first lane. */
5657 if (scalar_shift_arg && dt[1] == vect_internal_def)
5659 vop1 = vec_oprnds1[0];
5660 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5661 gassign *new_stmt
5662 = gimple_build_assign (new_temp,
5663 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5664 vop1,
5665 TYPE_SIZE (TREE_TYPE (new_temp)),
5666 bitsize_zero_node));
5667 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5668 vop1 = new_temp;
5670 else
5671 vop1 = vec_oprnds1[i];
5672 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5673 new_temp = make_ssa_name (vec_dest, new_stmt);
5674 gimple_assign_set_lhs (new_stmt, new_temp);
5675 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5676 if (slp_node)
5677 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5678 else
5679 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5682 if (!slp_node)
5683 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5685 vec_oprnds0.release ();
5686 vec_oprnds1.release ();
5688 return true;
5692 /* Function vectorizable_operation.
5694 Check if STMT_INFO performs a binary, unary or ternary operation that can
5695 be vectorized.
5696 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5697 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5698 Return true if STMT_INFO is vectorizable in this way. */
5700 static bool
5701 vectorizable_operation (vec_info *vinfo,
5702 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5703 gimple **vec_stmt, slp_tree slp_node,
5704 stmt_vector_for_cost *cost_vec)
5706 tree vec_dest;
5707 tree scalar_dest;
5708 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5709 tree vectype;
5710 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5711 enum tree_code code, orig_code;
5712 machine_mode vec_mode;
5713 tree new_temp;
5714 int op_type;
5715 optab optab;
5716 bool target_support_p;
5717 enum vect_def_type dt[3]
5718 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5719 int ndts = 3;
5720 poly_uint64 nunits_in;
5721 poly_uint64 nunits_out;
5722 tree vectype_out;
5723 int ncopies, vec_num;
5724 int i;
5725 vec<tree> vec_oprnds0 = vNULL;
5726 vec<tree> vec_oprnds1 = vNULL;
5727 vec<tree> vec_oprnds2 = vNULL;
5728 tree vop0, vop1, vop2;
5729 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5731 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5732 return false;
5734 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5735 && ! vec_stmt)
5736 return false;
5738 /* Is STMT a vectorizable binary/unary operation? */
5739 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5740 if (!stmt)
5741 return false;
5743 /* Loads and stores are handled in vectorizable_{load,store}. */
5744 if (STMT_VINFO_DATA_REF (stmt_info))
5745 return false;
5747 orig_code = code = gimple_assign_rhs_code (stmt);
5749 /* Shifts are handled in vectorizable_shift. */
5750 if (code == LSHIFT_EXPR
5751 || code == RSHIFT_EXPR
5752 || code == LROTATE_EXPR
5753 || code == RROTATE_EXPR)
5754 return false;
5756 /* Comparisons are handled in vectorizable_comparison. */
5757 if (TREE_CODE_CLASS (code) == tcc_comparison)
5758 return false;
5760 /* Conditions are handled in vectorizable_condition. */
5761 if (code == COND_EXPR)
5762 return false;
5764 /* For pointer addition and subtraction, we should use the normal
5765 plus and minus for the vector operation. */
5766 if (code == POINTER_PLUS_EXPR)
5767 code = PLUS_EXPR;
5768 if (code == POINTER_DIFF_EXPR)
5769 code = MINUS_EXPR;
5771 /* Support only unary or binary operations. */
5772 op_type = TREE_CODE_LENGTH (code);
5773 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5775 if (dump_enabled_p ())
5776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5777 "num. args = %d (not unary/binary/ternary op).\n",
5778 op_type);
5779 return false;
5782 scalar_dest = gimple_assign_lhs (stmt);
5783 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5785 /* Most operations cannot handle bit-precision types without extra
5786 truncations. */
5787 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5788 if (!mask_op_p
5789 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5790 /* Exception are bitwise binary operations. */
5791 && code != BIT_IOR_EXPR
5792 && code != BIT_XOR_EXPR
5793 && code != BIT_AND_EXPR)
5795 if (dump_enabled_p ())
5796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5797 "bit-precision arithmetic not supported.\n");
5798 return false;
5801 slp_tree slp_op0;
5802 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5803 0, &op0, &slp_op0, &dt[0], &vectype))
5805 if (dump_enabled_p ())
5806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5807 "use not simple.\n");
5808 return false;
5810 /* If op0 is an external or constant def, infer the vector type
5811 from the scalar type. */
5812 if (!vectype)
5814 /* For boolean type we cannot determine vectype by
5815 invariant value (don't know whether it is a vector
5816 of booleans or vector of integers). We use output
5817 vectype because operations on boolean don't change
5818 type. */
5819 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5821 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5823 if (dump_enabled_p ())
5824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5825 "not supported operation on bool value.\n");
5826 return false;
5828 vectype = vectype_out;
5830 else
5831 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5832 slp_node);
5834 if (vec_stmt)
5835 gcc_assert (vectype);
5836 if (!vectype)
5838 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5840 "no vectype for scalar type %T\n",
5841 TREE_TYPE (op0));
5843 return false;
5846 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5847 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5848 if (maybe_ne (nunits_out, nunits_in))
5849 return false;
5851 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5852 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5853 if (op_type == binary_op || op_type == ternary_op)
5855 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5856 1, &op1, &slp_op1, &dt[1], &vectype2))
5858 if (dump_enabled_p ())
5859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5860 "use not simple.\n");
5861 return false;
5864 if (op_type == ternary_op)
5866 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5867 2, &op2, &slp_op2, &dt[2], &vectype3))
5869 if (dump_enabled_p ())
5870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5871 "use not simple.\n");
5872 return false;
5876 /* Multiple types in SLP are handled by creating the appropriate number of
5877 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5878 case of SLP. */
5879 if (slp_node)
5881 ncopies = 1;
5882 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5884 else
5886 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5887 vec_num = 1;
5890 gcc_assert (ncopies >= 1);
5892 /* Reject attempts to combine mask types with nonmask types, e.g. if
5893 we have an AND between a (nonmask) boolean loaded from memory and
5894 a (mask) boolean result of a comparison.
5896 TODO: We could easily fix these cases up using pattern statements. */
5897 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5898 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5899 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5901 if (dump_enabled_p ())
5902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5903 "mixed mask and nonmask vector types\n");
5904 return false;
5907 /* Supportable by target? */
5909 vec_mode = TYPE_MODE (vectype);
5910 if (code == MULT_HIGHPART_EXPR)
5911 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5912 else
5914 optab = optab_for_tree_code (code, vectype, optab_default);
5915 if (!optab)
5917 if (dump_enabled_p ())
5918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5919 "no optab.\n");
5920 return false;
5922 target_support_p = (optab_handler (optab, vec_mode)
5923 != CODE_FOR_nothing);
5926 if (!target_support_p)
5928 if (dump_enabled_p ())
5929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5930 "op not supported by target.\n");
5931 /* Check only during analysis. */
5932 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5933 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5934 return false;
5935 if (dump_enabled_p ())
5936 dump_printf_loc (MSG_NOTE, vect_location,
5937 "proceeding using word mode.\n");
5940 /* Worthwhile without SIMD support? Check only during analysis. */
5941 if (!VECTOR_MODE_P (vec_mode)
5942 && !vec_stmt
5943 && !vect_worthwhile_without_simd_p (vinfo, code))
5945 if (dump_enabled_p ())
5946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5947 "not worthwhile without SIMD support.\n");
5948 return false;
5951 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
5952 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
5953 internal_fn cond_fn = get_conditional_internal_fn (code);
5955 if (!vec_stmt) /* transformation not required. */
5957 /* If this operation is part of a reduction, a fully-masked loop
5958 should only change the active lanes of the reduction chain,
5959 keeping the inactive lanes as-is. */
5960 if (loop_vinfo
5961 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5962 && reduc_idx >= 0)
5964 if (cond_fn == IFN_LAST
5965 || !direct_internal_fn_supported_p (cond_fn, vectype,
5966 OPTIMIZE_FOR_SPEED))
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5970 "can't use a fully-masked loop because no"
5971 " conditional operation is available.\n");
5972 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
5974 else
5975 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
5976 vectype, NULL);
5979 /* Put types on constant and invariant SLP children. */
5980 if (slp_node
5981 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5982 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
5983 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
5985 if (dump_enabled_p ())
5986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5987 "incompatible vector types for invariants\n");
5988 return false;
5991 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5992 DUMP_VECT_SCOPE ("vectorizable_operation");
5993 vect_model_simple_cost (vinfo, stmt_info,
5994 ncopies, dt, ndts, slp_node, cost_vec);
5995 return true;
5998 /* Transform. */
6000 if (dump_enabled_p ())
6001 dump_printf_loc (MSG_NOTE, vect_location,
6002 "transform binary/unary operation.\n");
6004 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6006 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6007 vectors with unsigned elements, but the result is signed. So, we
6008 need to compute the MINUS_EXPR into vectype temporary and
6009 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6010 tree vec_cvt_dest = NULL_TREE;
6011 if (orig_code == POINTER_DIFF_EXPR)
6013 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6014 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6016 /* Handle def. */
6017 else
6018 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6020 /* In case the vectorization factor (VF) is bigger than the number
6021 of elements that we can fit in a vectype (nunits), we have to generate
6022 more than one vector stmt - i.e - we need to "unroll" the
6023 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6024 from one copy of the vector stmt to the next, in the field
6025 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6026 stages to find the correct vector defs to be used when vectorizing
6027 stmts that use the defs of the current stmt. The example below
6028 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6029 we need to create 4 vectorized stmts):
6031 before vectorization:
6032 RELATED_STMT VEC_STMT
6033 S1: x = memref - -
6034 S2: z = x + 1 - -
6036 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6037 there):
6038 RELATED_STMT VEC_STMT
6039 VS1_0: vx0 = memref0 VS1_1 -
6040 VS1_1: vx1 = memref1 VS1_2 -
6041 VS1_2: vx2 = memref2 VS1_3 -
6042 VS1_3: vx3 = memref3 - -
6043 S1: x = load - VS1_0
6044 S2: z = x + 1 - -
6046 step2: vectorize stmt S2 (done here):
6047 To vectorize stmt S2 we first need to find the relevant vector
6048 def for the first operand 'x'. This is, as usual, obtained from
6049 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6050 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6051 relevant vector def 'vx0'. Having found 'vx0' we can generate
6052 the vector stmt VS2_0, and as usual, record it in the
6053 STMT_VINFO_VEC_STMT of stmt S2.
6054 When creating the second copy (VS2_1), we obtain the relevant vector
6055 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6056 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6057 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6058 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6059 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6060 chain of stmts and pointers:
6061 RELATED_STMT VEC_STMT
6062 VS1_0: vx0 = memref0 VS1_1 -
6063 VS1_1: vx1 = memref1 VS1_2 -
6064 VS1_2: vx2 = memref2 VS1_3 -
6065 VS1_3: vx3 = memref3 - -
6066 S1: x = load - VS1_0
6067 VS2_0: vz0 = vx0 + v1 VS2_1 -
6068 VS2_1: vz1 = vx1 + v1 VS2_2 -
6069 VS2_2: vz2 = vx2 + v1 VS2_3 -
6070 VS2_3: vz3 = vx3 + v1 - -
6071 S2: z = x + 1 - VS2_0 */
6073 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6074 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6075 /* Arguments are ready. Create the new vector stmt. */
6076 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6078 gimple *new_stmt = NULL;
6079 vop1 = ((op_type == binary_op || op_type == ternary_op)
6080 ? vec_oprnds1[i] : NULL_TREE);
6081 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6082 if (masked_loop_p && reduc_idx >= 0)
6084 /* Perform the operation on active elements only and take
6085 inactive elements from the reduction chain input. */
6086 gcc_assert (!vop2);
6087 vop2 = reduc_idx == 1 ? vop1 : vop0;
6088 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6089 vectype, i);
6090 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6091 vop0, vop1, vop2);
6092 new_temp = make_ssa_name (vec_dest, call);
6093 gimple_call_set_lhs (call, new_temp);
6094 gimple_call_set_nothrow (call, true);
6095 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6096 new_stmt = call;
6098 else
6100 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6101 new_temp = make_ssa_name (vec_dest, new_stmt);
6102 gimple_assign_set_lhs (new_stmt, new_temp);
6103 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6104 if (vec_cvt_dest)
6106 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6107 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6108 new_temp);
6109 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6110 gimple_assign_set_lhs (new_stmt, new_temp);
6111 vect_finish_stmt_generation (vinfo, stmt_info,
6112 new_stmt, gsi);
6115 if (slp_node)
6116 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6117 else
6118 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6121 if (!slp_node)
6122 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6124 vec_oprnds0.release ();
6125 vec_oprnds1.release ();
6126 vec_oprnds2.release ();
6128 return true;
6131 /* A helper function to ensure data reference DR_INFO's base alignment. */
6133 static void
6134 ensure_base_align (dr_vec_info *dr_info)
6136 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6137 return;
6139 if (dr_info->base_misaligned)
6141 tree base_decl = dr_info->base_decl;
6143 // We should only be able to increase the alignment of a base object if
6144 // we know what its new alignment should be at compile time.
6145 unsigned HOST_WIDE_INT align_base_to =
6146 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6148 if (decl_in_symtab_p (base_decl))
6149 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6150 else if (DECL_ALIGN (base_decl) < align_base_to)
6152 SET_DECL_ALIGN (base_decl, align_base_to);
6153 DECL_USER_ALIGN (base_decl) = 1;
6155 dr_info->base_misaligned = false;
6160 /* Function get_group_alias_ptr_type.
6162 Return the alias type for the group starting at FIRST_STMT_INFO. */
6164 static tree
6165 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6167 struct data_reference *first_dr, *next_dr;
6169 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6170 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6171 while (next_stmt_info)
6173 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6174 if (get_alias_set (DR_REF (first_dr))
6175 != get_alias_set (DR_REF (next_dr)))
6177 if (dump_enabled_p ())
6178 dump_printf_loc (MSG_NOTE, vect_location,
6179 "conflicting alias set types.\n");
6180 return ptr_type_node;
6182 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6184 return reference_alias_ptr_type (DR_REF (first_dr));
6188 /* Function scan_operand_equal_p.
6190 Helper function for check_scan_store. Compare two references
6191 with .GOMP_SIMD_LANE bases. */
6193 static bool
6194 scan_operand_equal_p (tree ref1, tree ref2)
6196 tree ref[2] = { ref1, ref2 };
6197 poly_int64 bitsize[2], bitpos[2];
6198 tree offset[2], base[2];
6199 for (int i = 0; i < 2; ++i)
6201 machine_mode mode;
6202 int unsignedp, reversep, volatilep = 0;
6203 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6204 &offset[i], &mode, &unsignedp,
6205 &reversep, &volatilep);
6206 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6207 return false;
6208 if (TREE_CODE (base[i]) == MEM_REF
6209 && offset[i] == NULL_TREE
6210 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6212 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6213 if (is_gimple_assign (def_stmt)
6214 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6215 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6216 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6218 if (maybe_ne (mem_ref_offset (base[i]), 0))
6219 return false;
6220 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6221 offset[i] = gimple_assign_rhs2 (def_stmt);
6226 if (!operand_equal_p (base[0], base[1], 0))
6227 return false;
6228 if (maybe_ne (bitsize[0], bitsize[1]))
6229 return false;
6230 if (offset[0] != offset[1])
6232 if (!offset[0] || !offset[1])
6233 return false;
6234 if (!operand_equal_p (offset[0], offset[1], 0))
6236 tree step[2];
6237 for (int i = 0; i < 2; ++i)
6239 step[i] = integer_one_node;
6240 if (TREE_CODE (offset[i]) == SSA_NAME)
6242 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6243 if (is_gimple_assign (def_stmt)
6244 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6245 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6246 == INTEGER_CST))
6248 step[i] = gimple_assign_rhs2 (def_stmt);
6249 offset[i] = gimple_assign_rhs1 (def_stmt);
6252 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6254 step[i] = TREE_OPERAND (offset[i], 1);
6255 offset[i] = TREE_OPERAND (offset[i], 0);
6257 tree rhs1 = NULL_TREE;
6258 if (TREE_CODE (offset[i]) == SSA_NAME)
6260 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6261 if (gimple_assign_cast_p (def_stmt))
6262 rhs1 = gimple_assign_rhs1 (def_stmt);
6264 else if (CONVERT_EXPR_P (offset[i]))
6265 rhs1 = TREE_OPERAND (offset[i], 0);
6266 if (rhs1
6267 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6268 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6269 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6270 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6271 offset[i] = rhs1;
6273 if (!operand_equal_p (offset[0], offset[1], 0)
6274 || !operand_equal_p (step[0], step[1], 0))
6275 return false;
6278 return true;
6282 enum scan_store_kind {
6283 /* Normal permutation. */
6284 scan_store_kind_perm,
6286 /* Whole vector left shift permutation with zero init. */
6287 scan_store_kind_lshift_zero,
6289 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6290 scan_store_kind_lshift_cond
6293 /* Function check_scan_store.
6295 Verify if we can perform the needed permutations or whole vector shifts.
6296 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6297 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6298 to do at each step. */
6300 static int
6301 scan_store_can_perm_p (tree vectype, tree init,
6302 vec<enum scan_store_kind> *use_whole_vector = NULL)
6304 enum machine_mode vec_mode = TYPE_MODE (vectype);
6305 unsigned HOST_WIDE_INT nunits;
6306 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6307 return -1;
6308 int units_log2 = exact_log2 (nunits);
6309 if (units_log2 <= 0)
6310 return -1;
6312 int i;
6313 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6314 for (i = 0; i <= units_log2; ++i)
6316 unsigned HOST_WIDE_INT j, k;
6317 enum scan_store_kind kind = scan_store_kind_perm;
6318 vec_perm_builder sel (nunits, nunits, 1);
6319 sel.quick_grow (nunits);
6320 if (i == units_log2)
6322 for (j = 0; j < nunits; ++j)
6323 sel[j] = nunits - 1;
6325 else
6327 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6328 sel[j] = j;
6329 for (k = 0; j < nunits; ++j, ++k)
6330 sel[j] = nunits + k;
6332 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6333 if (!can_vec_perm_const_p (vec_mode, indices))
6335 if (i == units_log2)
6336 return -1;
6338 if (whole_vector_shift_kind == scan_store_kind_perm)
6340 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6341 return -1;
6342 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6343 /* Whole vector shifts shift in zeros, so if init is all zero
6344 constant, there is no need to do anything further. */
6345 if ((TREE_CODE (init) != INTEGER_CST
6346 && TREE_CODE (init) != REAL_CST)
6347 || !initializer_zerop (init))
6349 tree masktype = truth_type_for (vectype);
6350 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6351 return -1;
6352 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6355 kind = whole_vector_shift_kind;
6357 if (use_whole_vector)
6359 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6360 use_whole_vector->safe_grow_cleared (i, true);
6361 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6362 use_whole_vector->safe_push (kind);
6366 return units_log2;
6370 /* Function check_scan_store.
6372 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6374 static bool
6375 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6376 enum vect_def_type rhs_dt, bool slp, tree mask,
6377 vect_memory_access_type memory_access_type)
6379 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6380 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6381 tree ref_type;
6383 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6384 if (slp
6385 || mask
6386 || memory_access_type != VMAT_CONTIGUOUS
6387 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6388 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6389 || loop_vinfo == NULL
6390 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6391 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6392 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6393 || !integer_zerop (DR_INIT (dr_info->dr))
6394 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6395 || !alias_sets_conflict_p (get_alias_set (vectype),
6396 get_alias_set (TREE_TYPE (ref_type))))
6398 if (dump_enabled_p ())
6399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6400 "unsupported OpenMP scan store.\n");
6401 return false;
6404 /* We need to pattern match code built by OpenMP lowering and simplified
6405 by following optimizations into something we can handle.
6406 #pragma omp simd reduction(inscan,+:r)
6407 for (...)
6409 r += something ();
6410 #pragma omp scan inclusive (r)
6411 use (r);
6413 shall have body with:
6414 // Initialization for input phase, store the reduction initializer:
6415 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6416 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6417 D.2042[_21] = 0;
6418 // Actual input phase:
6420 r.0_5 = D.2042[_20];
6421 _6 = _4 + r.0_5;
6422 D.2042[_20] = _6;
6423 // Initialization for scan phase:
6424 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6425 _26 = D.2043[_25];
6426 _27 = D.2042[_25];
6427 _28 = _26 + _27;
6428 D.2043[_25] = _28;
6429 D.2042[_25] = _28;
6430 // Actual scan phase:
6432 r.1_8 = D.2042[_20];
6434 The "omp simd array" variable D.2042 holds the privatized copy used
6435 inside of the loop and D.2043 is another one that holds copies of
6436 the current original list item. The separate GOMP_SIMD_LANE ifn
6437 kinds are there in order to allow optimizing the initializer store
6438 and combiner sequence, e.g. if it is originally some C++ish user
6439 defined reduction, but allow the vectorizer to pattern recognize it
6440 and turn into the appropriate vectorized scan.
6442 For exclusive scan, this is slightly different:
6443 #pragma omp simd reduction(inscan,+:r)
6444 for (...)
6446 use (r);
6447 #pragma omp scan exclusive (r)
6448 r += something ();
6450 shall have body with:
6451 // Initialization for input phase, store the reduction initializer:
6452 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6453 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6454 D.2042[_21] = 0;
6455 // Actual input phase:
6457 r.0_5 = D.2042[_20];
6458 _6 = _4 + r.0_5;
6459 D.2042[_20] = _6;
6460 // Initialization for scan phase:
6461 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6462 _26 = D.2043[_25];
6463 D.2044[_25] = _26;
6464 _27 = D.2042[_25];
6465 _28 = _26 + _27;
6466 D.2043[_25] = _28;
6467 // Actual scan phase:
6469 r.1_8 = D.2044[_20];
6470 ... */
6472 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6474 /* Match the D.2042[_21] = 0; store above. Just require that
6475 it is a constant or external definition store. */
6476 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6478 fail_init:
6479 if (dump_enabled_p ())
6480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6481 "unsupported OpenMP scan initializer store.\n");
6482 return false;
6485 if (! loop_vinfo->scan_map)
6486 loop_vinfo->scan_map = new hash_map<tree, tree>;
6487 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6488 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6489 if (cached)
6490 goto fail_init;
6491 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6493 /* These stores can be vectorized normally. */
6494 return true;
6497 if (rhs_dt != vect_internal_def)
6499 fail:
6500 if (dump_enabled_p ())
6501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6502 "unsupported OpenMP scan combiner pattern.\n");
6503 return false;
6506 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6507 tree rhs = gimple_assign_rhs1 (stmt);
6508 if (TREE_CODE (rhs) != SSA_NAME)
6509 goto fail;
6511 gimple *other_store_stmt = NULL;
6512 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6513 bool inscan_var_store
6514 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6516 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6518 if (!inscan_var_store)
6520 use_operand_p use_p;
6521 imm_use_iterator iter;
6522 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6524 gimple *use_stmt = USE_STMT (use_p);
6525 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6526 continue;
6527 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6528 || !is_gimple_assign (use_stmt)
6529 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6530 || other_store_stmt
6531 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6532 goto fail;
6533 other_store_stmt = use_stmt;
6535 if (other_store_stmt == NULL)
6536 goto fail;
6537 rhs = gimple_assign_lhs (other_store_stmt);
6538 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6539 goto fail;
6542 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6544 use_operand_p use_p;
6545 imm_use_iterator iter;
6546 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6548 gimple *use_stmt = USE_STMT (use_p);
6549 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6550 continue;
6551 if (other_store_stmt)
6552 goto fail;
6553 other_store_stmt = use_stmt;
6556 else
6557 goto fail;
6559 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6560 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6561 || !is_gimple_assign (def_stmt)
6562 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6563 goto fail;
6565 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6566 /* For pointer addition, we should use the normal plus for the vector
6567 operation. */
6568 switch (code)
6570 case POINTER_PLUS_EXPR:
6571 code = PLUS_EXPR;
6572 break;
6573 case MULT_HIGHPART_EXPR:
6574 goto fail;
6575 default:
6576 break;
6578 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6579 goto fail;
6581 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6582 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6583 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6584 goto fail;
6586 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6587 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6588 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6589 || !gimple_assign_load_p (load1_stmt)
6590 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6591 || !gimple_assign_load_p (load2_stmt))
6592 goto fail;
6594 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6595 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6596 if (load1_stmt_info == NULL
6597 || load2_stmt_info == NULL
6598 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6599 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6600 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6601 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6602 goto fail;
6604 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6606 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6607 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6608 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6609 goto fail;
6610 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6611 tree lrhs;
6612 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6613 lrhs = rhs1;
6614 else
6615 lrhs = rhs2;
6616 use_operand_p use_p;
6617 imm_use_iterator iter;
6618 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6620 gimple *use_stmt = USE_STMT (use_p);
6621 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6622 continue;
6623 if (other_store_stmt)
6624 goto fail;
6625 other_store_stmt = use_stmt;
6629 if (other_store_stmt == NULL)
6630 goto fail;
6631 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6632 || !gimple_store_p (other_store_stmt))
6633 goto fail;
6635 stmt_vec_info other_store_stmt_info
6636 = loop_vinfo->lookup_stmt (other_store_stmt);
6637 if (other_store_stmt_info == NULL
6638 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6639 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6640 goto fail;
6642 gimple *stmt1 = stmt;
6643 gimple *stmt2 = other_store_stmt;
6644 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6645 std::swap (stmt1, stmt2);
6646 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6647 gimple_assign_rhs1 (load2_stmt)))
6649 std::swap (rhs1, rhs2);
6650 std::swap (load1_stmt, load2_stmt);
6651 std::swap (load1_stmt_info, load2_stmt_info);
6653 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6654 gimple_assign_rhs1 (load1_stmt)))
6655 goto fail;
6657 tree var3 = NULL_TREE;
6658 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6659 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6660 gimple_assign_rhs1 (load2_stmt)))
6661 goto fail;
6662 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6664 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6665 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6666 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6667 goto fail;
6668 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6669 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6670 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6671 || lookup_attribute ("omp simd inscan exclusive",
6672 DECL_ATTRIBUTES (var3)))
6673 goto fail;
6676 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6677 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6678 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6679 goto fail;
6681 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6682 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6683 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6684 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6685 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6686 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6687 goto fail;
6689 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6690 std::swap (var1, var2);
6692 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6694 if (!lookup_attribute ("omp simd inscan exclusive",
6695 DECL_ATTRIBUTES (var1)))
6696 goto fail;
6697 var1 = var3;
6700 if (loop_vinfo->scan_map == NULL)
6701 goto fail;
6702 tree *init = loop_vinfo->scan_map->get (var1);
6703 if (init == NULL)
6704 goto fail;
6706 /* The IL is as expected, now check if we can actually vectorize it.
6707 Inclusive scan:
6708 _26 = D.2043[_25];
6709 _27 = D.2042[_25];
6710 _28 = _26 + _27;
6711 D.2043[_25] = _28;
6712 D.2042[_25] = _28;
6713 should be vectorized as (where _40 is the vectorized rhs
6714 from the D.2042[_21] = 0; store):
6715 _30 = MEM <vector(8) int> [(int *)&D.2043];
6716 _31 = MEM <vector(8) int> [(int *)&D.2042];
6717 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6718 _33 = _31 + _32;
6719 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6720 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6721 _35 = _33 + _34;
6722 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6723 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6724 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6725 _37 = _35 + _36;
6726 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6727 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6728 _38 = _30 + _37;
6729 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6730 MEM <vector(8) int> [(int *)&D.2043] = _39;
6731 MEM <vector(8) int> [(int *)&D.2042] = _38;
6732 Exclusive scan:
6733 _26 = D.2043[_25];
6734 D.2044[_25] = _26;
6735 _27 = D.2042[_25];
6736 _28 = _26 + _27;
6737 D.2043[_25] = _28;
6738 should be vectorized as (where _40 is the vectorized rhs
6739 from the D.2042[_21] = 0; store):
6740 _30 = MEM <vector(8) int> [(int *)&D.2043];
6741 _31 = MEM <vector(8) int> [(int *)&D.2042];
6742 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6743 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6744 _34 = _32 + _33;
6745 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6746 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6747 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6748 _36 = _34 + _35;
6749 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6750 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6751 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6752 _38 = _36 + _37;
6753 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6754 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6755 _39 = _30 + _38;
6756 _50 = _31 + _39;
6757 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6758 MEM <vector(8) int> [(int *)&D.2044] = _39;
6759 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6760 enum machine_mode vec_mode = TYPE_MODE (vectype);
6761 optab optab = optab_for_tree_code (code, vectype, optab_default);
6762 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6763 goto fail;
6765 int units_log2 = scan_store_can_perm_p (vectype, *init);
6766 if (units_log2 == -1)
6767 goto fail;
6769 return true;
6773 /* Function vectorizable_scan_store.
6775 Helper of vectorizable_score, arguments like on vectorizable_store.
6776 Handle only the transformation, checking is done in check_scan_store. */
6778 static bool
6779 vectorizable_scan_store (vec_info *vinfo,
6780 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6781 gimple **vec_stmt, int ncopies)
6783 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6784 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6785 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6786 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6788 if (dump_enabled_p ())
6789 dump_printf_loc (MSG_NOTE, vect_location,
6790 "transform scan store. ncopies = %d\n", ncopies);
6792 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6793 tree rhs = gimple_assign_rhs1 (stmt);
6794 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6796 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6797 bool inscan_var_store
6798 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6800 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6802 use_operand_p use_p;
6803 imm_use_iterator iter;
6804 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6806 gimple *use_stmt = USE_STMT (use_p);
6807 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6808 continue;
6809 rhs = gimple_assign_lhs (use_stmt);
6810 break;
6814 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6815 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6816 if (code == POINTER_PLUS_EXPR)
6817 code = PLUS_EXPR;
6818 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6819 && commutative_tree_code (code));
6820 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6821 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6822 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6823 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6824 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6825 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6826 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6827 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6828 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6829 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6830 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6832 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6834 std::swap (rhs1, rhs2);
6835 std::swap (var1, var2);
6836 std::swap (load1_dr_info, load2_dr_info);
6839 tree *init = loop_vinfo->scan_map->get (var1);
6840 gcc_assert (init);
6842 unsigned HOST_WIDE_INT nunits;
6843 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6844 gcc_unreachable ();
6845 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6846 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6847 gcc_assert (units_log2 > 0);
6848 auto_vec<tree, 16> perms;
6849 perms.quick_grow (units_log2 + 1);
6850 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6851 for (int i = 0; i <= units_log2; ++i)
6853 unsigned HOST_WIDE_INT j, k;
6854 vec_perm_builder sel (nunits, nunits, 1);
6855 sel.quick_grow (nunits);
6856 if (i == units_log2)
6857 for (j = 0; j < nunits; ++j)
6858 sel[j] = nunits - 1;
6859 else
6861 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6862 sel[j] = j;
6863 for (k = 0; j < nunits; ++j, ++k)
6864 sel[j] = nunits + k;
6866 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6867 if (!use_whole_vector.is_empty ()
6868 && use_whole_vector[i] != scan_store_kind_perm)
6870 if (zero_vec == NULL_TREE)
6871 zero_vec = build_zero_cst (vectype);
6872 if (masktype == NULL_TREE
6873 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6874 masktype = truth_type_for (vectype);
6875 perms[i] = vect_gen_perm_mask_any (vectype, indices);
6877 else
6878 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6881 tree vec_oprnd1 = NULL_TREE;
6882 tree vec_oprnd2 = NULL_TREE;
6883 tree vec_oprnd3 = NULL_TREE;
6884 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6885 tree dataref_offset = build_int_cst (ref_type, 0);
6886 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6887 vectype, VMAT_CONTIGUOUS);
6888 tree ldataref_ptr = NULL_TREE;
6889 tree orig = NULL_TREE;
6890 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6891 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6892 auto_vec<tree> vec_oprnds1;
6893 auto_vec<tree> vec_oprnds2;
6894 auto_vec<tree> vec_oprnds3;
6895 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6896 *init, &vec_oprnds1,
6897 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6898 rhs2, &vec_oprnds3);
6899 for (int j = 0; j < ncopies; j++)
6901 vec_oprnd1 = vec_oprnds1[j];
6902 if (ldataref_ptr == NULL)
6903 vec_oprnd2 = vec_oprnds2[j];
6904 vec_oprnd3 = vec_oprnds3[j];
6905 if (j == 0)
6906 orig = vec_oprnd3;
6907 else if (!inscan_var_store)
6908 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6910 if (ldataref_ptr)
6912 vec_oprnd2 = make_ssa_name (vectype);
6913 tree data_ref = fold_build2 (MEM_REF, vectype,
6914 unshare_expr (ldataref_ptr),
6915 dataref_offset);
6916 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
6917 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
6918 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6919 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6920 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6923 tree v = vec_oprnd2;
6924 for (int i = 0; i < units_log2; ++i)
6926 tree new_temp = make_ssa_name (vectype);
6927 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
6928 (zero_vec
6929 && (use_whole_vector[i]
6930 != scan_store_kind_perm))
6931 ? zero_vec : vec_oprnd1, v,
6932 perms[i]);
6933 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6934 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6935 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6937 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
6939 /* Whole vector shift shifted in zero bits, but if *init
6940 is not initializer_zerop, we need to replace those elements
6941 with elements from vec_oprnd1. */
6942 tree_vector_builder vb (masktype, nunits, 1);
6943 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
6944 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
6945 ? boolean_false_node : boolean_true_node);
6947 tree new_temp2 = make_ssa_name (vectype);
6948 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
6949 new_temp, vec_oprnd1);
6950 vect_finish_stmt_generation (vinfo, stmt_info,
6951 g, gsi);
6952 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6953 new_temp = new_temp2;
6956 /* For exclusive scan, perform the perms[i] permutation once
6957 more. */
6958 if (i == 0
6959 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
6960 && v == vec_oprnd2)
6962 v = new_temp;
6963 --i;
6964 continue;
6967 tree new_temp2 = make_ssa_name (vectype);
6968 g = gimple_build_assign (new_temp2, code, v, new_temp);
6969 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6970 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6972 v = new_temp2;
6975 tree new_temp = make_ssa_name (vectype);
6976 gimple *g = gimple_build_assign (new_temp, code, orig, v);
6977 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6978 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6980 tree last_perm_arg = new_temp;
6981 /* For exclusive scan, new_temp computed above is the exclusive scan
6982 prefix sum. Turn it into inclusive prefix sum for the broadcast
6983 of the last element into orig. */
6984 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6986 last_perm_arg = make_ssa_name (vectype);
6987 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
6988 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6989 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6992 orig = make_ssa_name (vectype);
6993 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
6994 last_perm_arg, perms[units_log2]);
6995 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6996 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6998 if (!inscan_var_store)
7000 tree data_ref = fold_build2 (MEM_REF, vectype,
7001 unshare_expr (dataref_ptr),
7002 dataref_offset);
7003 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7004 g = gimple_build_assign (data_ref, new_temp);
7005 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7006 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7010 if (inscan_var_store)
7011 for (int j = 0; j < ncopies; j++)
7013 if (j != 0)
7014 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7016 tree data_ref = fold_build2 (MEM_REF, vectype,
7017 unshare_expr (dataref_ptr),
7018 dataref_offset);
7019 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7020 gimple *g = gimple_build_assign (data_ref, orig);
7021 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7022 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7024 return true;
7028 /* Function vectorizable_store.
7030 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7031 that can be vectorized.
7032 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7033 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7034 Return true if STMT_INFO is vectorizable in this way. */
7036 static bool
7037 vectorizable_store (vec_info *vinfo,
7038 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7039 gimple **vec_stmt, slp_tree slp_node,
7040 stmt_vector_for_cost *cost_vec)
7042 tree data_ref;
7043 tree op;
7044 tree vec_oprnd = NULL_TREE;
7045 tree elem_type;
7046 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7047 class loop *loop = NULL;
7048 machine_mode vec_mode;
7049 tree dummy;
7050 enum vect_def_type rhs_dt = vect_unknown_def_type;
7051 enum vect_def_type mask_dt = vect_unknown_def_type;
7052 tree dataref_ptr = NULL_TREE;
7053 tree dataref_offset = NULL_TREE;
7054 gimple *ptr_incr = NULL;
7055 int ncopies;
7056 int j;
7057 stmt_vec_info first_stmt_info;
7058 bool grouped_store;
7059 unsigned int group_size, i;
7060 vec<tree> oprnds = vNULL;
7061 vec<tree> result_chain = vNULL;
7062 tree offset = NULL_TREE;
7063 vec<tree> vec_oprnds = vNULL;
7064 bool slp = (slp_node != NULL);
7065 unsigned int vec_num;
7066 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7067 tree aggr_type;
7068 gather_scatter_info gs_info;
7069 poly_uint64 vf;
7070 vec_load_store_type vls_type;
7071 tree ref_type;
7073 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7074 return false;
7076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7077 && ! vec_stmt)
7078 return false;
7080 /* Is vectorizable store? */
7082 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7083 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7085 tree scalar_dest = gimple_assign_lhs (assign);
7086 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7087 && is_pattern_stmt_p (stmt_info))
7088 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7089 if (TREE_CODE (scalar_dest) != ARRAY_REF
7090 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7091 && TREE_CODE (scalar_dest) != INDIRECT_REF
7092 && TREE_CODE (scalar_dest) != COMPONENT_REF
7093 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7094 && TREE_CODE (scalar_dest) != REALPART_EXPR
7095 && TREE_CODE (scalar_dest) != MEM_REF)
7096 return false;
7098 else
7100 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7101 if (!call || !gimple_call_internal_p (call))
7102 return false;
7104 internal_fn ifn = gimple_call_internal_fn (call);
7105 if (!internal_store_fn_p (ifn))
7106 return false;
7108 if (slp_node != NULL)
7110 if (dump_enabled_p ())
7111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7112 "SLP of masked stores not supported.\n");
7113 return false;
7116 int mask_index = internal_fn_mask_index (ifn);
7117 if (mask_index >= 0)
7119 mask = gimple_call_arg (call, mask_index);
7120 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7121 &mask_vectype))
7122 return false;
7126 op = vect_get_store_rhs (stmt_info);
7128 /* Cannot have hybrid store SLP -- that would mean storing to the
7129 same location twice. */
7130 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7132 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7133 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7135 if (loop_vinfo)
7137 loop = LOOP_VINFO_LOOP (loop_vinfo);
7138 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7140 else
7141 vf = 1;
7143 /* Multiple types in SLP are handled by creating the appropriate number of
7144 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7145 case of SLP. */
7146 if (slp)
7147 ncopies = 1;
7148 else
7149 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7151 gcc_assert (ncopies >= 1);
7153 /* FORNOW. This restriction should be relaxed. */
7154 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7156 if (dump_enabled_p ())
7157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7158 "multiple types in nested loop.\n");
7159 return false;
7162 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7163 op, &rhs_dt, &rhs_vectype, &vls_type))
7164 return false;
7166 elem_type = TREE_TYPE (vectype);
7167 vec_mode = TYPE_MODE (vectype);
7169 if (!STMT_VINFO_DATA_REF (stmt_info))
7170 return false;
7172 vect_memory_access_type memory_access_type;
7173 enum dr_alignment_support alignment_support_scheme;
7174 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7175 ncopies, &memory_access_type,
7176 &alignment_support_scheme, &gs_info))
7177 return false;
7179 if (mask)
7181 if (memory_access_type == VMAT_CONTIGUOUS)
7183 if (!VECTOR_MODE_P (vec_mode)
7184 || !can_vec_mask_load_store_p (vec_mode,
7185 TYPE_MODE (mask_vectype), false))
7186 return false;
7188 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7189 && (memory_access_type != VMAT_GATHER_SCATTER
7190 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7192 if (dump_enabled_p ())
7193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7194 "unsupported access type for masked store.\n");
7195 return false;
7198 else
7200 /* FORNOW. In some cases can vectorize even if data-type not supported
7201 (e.g. - array initialization with 0). */
7202 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7203 return false;
7206 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7207 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7208 && memory_access_type != VMAT_GATHER_SCATTER
7209 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7210 if (grouped_store)
7212 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7213 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7214 group_size = DR_GROUP_SIZE (first_stmt_info);
7216 else
7218 first_stmt_info = stmt_info;
7219 first_dr_info = dr_info;
7220 group_size = vec_num = 1;
7223 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7225 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7226 memory_access_type))
7227 return false;
7230 if (!vec_stmt) /* transformation not required. */
7232 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7234 if (loop_vinfo
7235 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7236 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7237 group_size, memory_access_type,
7238 &gs_info, mask);
7240 if (slp_node
7241 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7242 vectype))
7244 if (dump_enabled_p ())
7245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7246 "incompatible vector types for invariants\n");
7247 return false;
7250 if (dump_enabled_p ()
7251 && memory_access_type != VMAT_ELEMENTWISE
7252 && memory_access_type != VMAT_GATHER_SCATTER
7253 && alignment_support_scheme != dr_aligned)
7254 dump_printf_loc (MSG_NOTE, vect_location,
7255 "Vectorizing an unaligned access.\n");
7257 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7258 vect_model_store_cost (vinfo, stmt_info, ncopies,
7259 memory_access_type, vls_type, slp_node, cost_vec);
7260 return true;
7262 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7264 /* Transform. */
7266 ensure_base_align (dr_info);
7268 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7270 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7271 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7272 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7273 tree ptr, var, scale, vec_mask;
7274 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7275 tree mask_halfvectype = mask_vectype;
7276 edge pe = loop_preheader_edge (loop);
7277 gimple_seq seq;
7278 basic_block new_bb;
7279 enum { NARROW, NONE, WIDEN } modifier;
7280 poly_uint64 scatter_off_nunits
7281 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7283 if (known_eq (nunits, scatter_off_nunits))
7284 modifier = NONE;
7285 else if (known_eq (nunits * 2, scatter_off_nunits))
7287 modifier = WIDEN;
7289 /* Currently gathers and scatters are only supported for
7290 fixed-length vectors. */
7291 unsigned int count = scatter_off_nunits.to_constant ();
7292 vec_perm_builder sel (count, count, 1);
7293 for (i = 0; i < (unsigned int) count; ++i)
7294 sel.quick_push (i | (count / 2));
7296 vec_perm_indices indices (sel, 1, count);
7297 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7298 indices);
7299 gcc_assert (perm_mask != NULL_TREE);
7301 else if (known_eq (nunits, scatter_off_nunits * 2))
7303 modifier = NARROW;
7305 /* Currently gathers and scatters are only supported for
7306 fixed-length vectors. */
7307 unsigned int count = nunits.to_constant ();
7308 vec_perm_builder sel (count, count, 1);
7309 for (i = 0; i < (unsigned int) count; ++i)
7310 sel.quick_push (i | (count / 2));
7312 vec_perm_indices indices (sel, 2, count);
7313 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7314 gcc_assert (perm_mask != NULL_TREE);
7315 ncopies *= 2;
7317 if (mask)
7318 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7320 else
7321 gcc_unreachable ();
7323 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7324 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7325 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7326 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7327 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7328 scaletype = TREE_VALUE (arglist);
7330 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7331 && TREE_CODE (rettype) == VOID_TYPE);
7333 ptr = fold_convert (ptrtype, gs_info.base);
7334 if (!is_gimple_min_invariant (ptr))
7336 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7337 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7338 gcc_assert (!new_bb);
7341 if (mask == NULL_TREE)
7343 mask_arg = build_int_cst (masktype, -1);
7344 mask_arg = vect_init_vector (vinfo, stmt_info,
7345 mask_arg, masktype, NULL);
7348 scale = build_int_cst (scaletype, gs_info.scale);
7350 auto_vec<tree> vec_oprnds0;
7351 auto_vec<tree> vec_oprnds1;
7352 auto_vec<tree> vec_masks;
7353 if (mask)
7355 tree mask_vectype = truth_type_for (vectype);
7356 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7357 modifier == NARROW
7358 ? ncopies / 2 : ncopies,
7359 mask, &vec_masks, mask_vectype);
7361 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7362 modifier == WIDEN
7363 ? ncopies / 2 : ncopies,
7364 gs_info.offset, &vec_oprnds0);
7365 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7366 modifier == NARROW
7367 ? ncopies / 2 : ncopies,
7368 op, &vec_oprnds1);
7369 for (j = 0; j < ncopies; ++j)
7371 if (modifier == WIDEN)
7373 if (j & 1)
7374 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7375 perm_mask, stmt_info, gsi);
7376 else
7377 op = vec_oprnd0 = vec_oprnds0[j / 2];
7378 src = vec_oprnd1 = vec_oprnds1[j];
7379 if (mask)
7380 mask_op = vec_mask = vec_masks[j];
7382 else if (modifier == NARROW)
7384 if (j & 1)
7385 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7386 perm_mask, stmt_info, gsi);
7387 else
7388 src = vec_oprnd1 = vec_oprnds1[j / 2];
7389 op = vec_oprnd0 = vec_oprnds0[j];
7390 if (mask)
7391 mask_op = vec_mask = vec_masks[j / 2];
7393 else
7395 op = vec_oprnd0 = vec_oprnds0[j];
7396 src = vec_oprnd1 = vec_oprnds1[j];
7397 if (mask)
7398 mask_op = vec_mask = vec_masks[j];
7401 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7403 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7404 TYPE_VECTOR_SUBPARTS (srctype)));
7405 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7406 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7407 gassign *new_stmt
7408 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7409 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7410 src = var;
7413 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7415 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7416 TYPE_VECTOR_SUBPARTS (idxtype)));
7417 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7418 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7419 gassign *new_stmt
7420 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7421 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7422 op = var;
7425 if (mask)
7427 tree utype;
7428 mask_arg = mask_op;
7429 if (modifier == NARROW)
7431 var = vect_get_new_ssa_name (mask_halfvectype,
7432 vect_simple_var);
7433 gassign *new_stmt
7434 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7435 : VEC_UNPACK_LO_EXPR,
7436 mask_op);
7437 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7438 mask_arg = var;
7440 tree optype = TREE_TYPE (mask_arg);
7441 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7442 utype = masktype;
7443 else
7444 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7445 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7446 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7447 gassign *new_stmt
7448 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7449 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7450 mask_arg = var;
7451 if (!useless_type_conversion_p (masktype, utype))
7453 gcc_assert (TYPE_PRECISION (utype)
7454 <= TYPE_PRECISION (masktype));
7455 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7456 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7457 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7458 mask_arg = var;
7462 gcall *new_stmt
7463 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7464 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7466 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7468 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7469 return true;
7471 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7472 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7474 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7475 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7477 if (grouped_store)
7479 /* FORNOW */
7480 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7482 /* We vectorize all the stmts of the interleaving group when we
7483 reach the last stmt in the group. */
7484 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7485 < DR_GROUP_SIZE (first_stmt_info)
7486 && !slp)
7488 *vec_stmt = NULL;
7489 return true;
7492 if (slp)
7494 grouped_store = false;
7495 /* VEC_NUM is the number of vect stmts to be created for this
7496 group. */
7497 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7498 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7499 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7500 == first_stmt_info);
7501 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7502 op = vect_get_store_rhs (first_stmt_info);
7504 else
7505 /* VEC_NUM is the number of vect stmts to be created for this
7506 group. */
7507 vec_num = group_size;
7509 ref_type = get_group_alias_ptr_type (first_stmt_info);
7511 else
7512 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7514 if (dump_enabled_p ())
7515 dump_printf_loc (MSG_NOTE, vect_location,
7516 "transform store. ncopies = %d\n", ncopies);
7518 if (memory_access_type == VMAT_ELEMENTWISE
7519 || memory_access_type == VMAT_STRIDED_SLP)
7521 gimple_stmt_iterator incr_gsi;
7522 bool insert_after;
7523 gimple *incr;
7524 tree offvar;
7525 tree ivstep;
7526 tree running_off;
7527 tree stride_base, stride_step, alias_off;
7528 tree vec_oprnd;
7529 tree dr_offset;
7530 unsigned int g;
7531 /* Checked by get_load_store_type. */
7532 unsigned int const_nunits = nunits.to_constant ();
7534 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7535 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7537 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7538 stride_base
7539 = fold_build_pointer_plus
7540 (DR_BASE_ADDRESS (first_dr_info->dr),
7541 size_binop (PLUS_EXPR,
7542 convert_to_ptrofftype (dr_offset),
7543 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7544 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7546 /* For a store with loop-invariant (but other than power-of-2)
7547 stride (i.e. not a grouped access) like so:
7549 for (i = 0; i < n; i += stride)
7550 array[i] = ...;
7552 we generate a new induction variable and new stores from
7553 the components of the (vectorized) rhs:
7555 for (j = 0; ; j += VF*stride)
7556 vectemp = ...;
7557 tmp1 = vectemp[0];
7558 array[j] = tmp1;
7559 tmp2 = vectemp[1];
7560 array[j + stride] = tmp2;
7564 unsigned nstores = const_nunits;
7565 unsigned lnel = 1;
7566 tree ltype = elem_type;
7567 tree lvectype = vectype;
7568 if (slp)
7570 if (group_size < const_nunits
7571 && const_nunits % group_size == 0)
7573 nstores = const_nunits / group_size;
7574 lnel = group_size;
7575 ltype = build_vector_type (elem_type, group_size);
7576 lvectype = vectype;
7578 /* First check if vec_extract optab doesn't support extraction
7579 of vector elts directly. */
7580 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7581 machine_mode vmode;
7582 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7583 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7584 group_size).exists (&vmode)
7585 || (convert_optab_handler (vec_extract_optab,
7586 TYPE_MODE (vectype), vmode)
7587 == CODE_FOR_nothing))
7589 /* Try to avoid emitting an extract of vector elements
7590 by performing the extracts using an integer type of the
7591 same size, extracting from a vector of those and then
7592 re-interpreting it as the original vector type if
7593 supported. */
7594 unsigned lsize
7595 = group_size * GET_MODE_BITSIZE (elmode);
7596 unsigned int lnunits = const_nunits / group_size;
7597 /* If we can't construct such a vector fall back to
7598 element extracts from the original vector type and
7599 element size stores. */
7600 if (int_mode_for_size (lsize, 0).exists (&elmode)
7601 && VECTOR_MODE_P (TYPE_MODE (vectype))
7602 && related_vector_mode (TYPE_MODE (vectype), elmode,
7603 lnunits).exists (&vmode)
7604 && (convert_optab_handler (vec_extract_optab,
7605 vmode, elmode)
7606 != CODE_FOR_nothing))
7608 nstores = lnunits;
7609 lnel = group_size;
7610 ltype = build_nonstandard_integer_type (lsize, 1);
7611 lvectype = build_vector_type (ltype, nstores);
7613 /* Else fall back to vector extraction anyway.
7614 Fewer stores are more important than avoiding spilling
7615 of the vector we extract from. Compared to the
7616 construction case in vectorizable_load no store-forwarding
7617 issue exists here for reasonable archs. */
7620 else if (group_size >= const_nunits
7621 && group_size % const_nunits == 0)
7623 nstores = 1;
7624 lnel = const_nunits;
7625 ltype = vectype;
7626 lvectype = vectype;
7628 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7629 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7632 ivstep = stride_step;
7633 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7634 build_int_cst (TREE_TYPE (ivstep), vf));
7636 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7638 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7639 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7640 create_iv (stride_base, ivstep, NULL,
7641 loop, &incr_gsi, insert_after,
7642 &offvar, NULL);
7643 incr = gsi_stmt (incr_gsi);
7645 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7647 alias_off = build_int_cst (ref_type, 0);
7648 stmt_vec_info next_stmt_info = first_stmt_info;
7649 for (g = 0; g < group_size; g++)
7651 running_off = offvar;
7652 if (g)
7654 tree size = TYPE_SIZE_UNIT (ltype);
7655 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7656 size);
7657 tree newoff = copy_ssa_name (running_off, NULL);
7658 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7659 running_off, pos);
7660 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7661 running_off = newoff;
7663 if (!slp)
7664 op = vect_get_store_rhs (next_stmt_info);
7665 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7666 op, &vec_oprnds);
7667 unsigned int group_el = 0;
7668 unsigned HOST_WIDE_INT
7669 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7670 for (j = 0; j < ncopies; j++)
7672 vec_oprnd = vec_oprnds[j];
7673 /* Pun the vector to extract from if necessary. */
7674 if (lvectype != vectype)
7676 tree tem = make_ssa_name (lvectype);
7677 gimple *pun
7678 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7679 lvectype, vec_oprnd));
7680 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7681 vec_oprnd = tem;
7683 for (i = 0; i < nstores; i++)
7685 tree newref, newoff;
7686 gimple *incr, *assign;
7687 tree size = TYPE_SIZE (ltype);
7688 /* Extract the i'th component. */
7689 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7690 bitsize_int (i), size);
7691 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7692 size, pos);
7694 elem = force_gimple_operand_gsi (gsi, elem, true,
7695 NULL_TREE, true,
7696 GSI_SAME_STMT);
7698 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7699 group_el * elsz);
7700 newref = build2 (MEM_REF, ltype,
7701 running_off, this_off);
7702 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7704 /* And store it to *running_off. */
7705 assign = gimple_build_assign (newref, elem);
7706 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7708 group_el += lnel;
7709 if (! slp
7710 || group_el == group_size)
7712 newoff = copy_ssa_name (running_off, NULL);
7713 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7714 running_off, stride_step);
7715 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7717 running_off = newoff;
7718 group_el = 0;
7720 if (g == group_size - 1
7721 && !slp)
7723 if (j == 0 && i == 0)
7724 *vec_stmt = assign;
7725 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7729 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7730 if (slp)
7731 break;
7734 vec_oprnds.release ();
7735 return true;
7738 auto_vec<tree> dr_chain (group_size);
7739 oprnds.create (group_size);
7741 /* Gather-scatter accesses perform only component accesses, alignment
7742 is irrelevant for them. */
7743 if (memory_access_type == VMAT_GATHER_SCATTER)
7744 alignment_support_scheme = dr_unaligned_supported;
7745 else
7746 alignment_support_scheme
7747 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7749 gcc_assert (alignment_support_scheme);
7750 vec_loop_masks *loop_masks
7751 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7752 ? &LOOP_VINFO_MASKS (loop_vinfo)
7753 : NULL);
7754 vec_loop_lens *loop_lens
7755 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7756 ? &LOOP_VINFO_LENS (loop_vinfo)
7757 : NULL);
7759 /* Shouldn't go with length-based approach if fully masked. */
7760 gcc_assert (!loop_lens || !loop_masks);
7762 /* Targets with store-lane instructions must not require explicit
7763 realignment. vect_supportable_dr_alignment always returns either
7764 dr_aligned or dr_unaligned_supported for masked operations. */
7765 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7766 && !mask
7767 && !loop_masks)
7768 || alignment_support_scheme == dr_aligned
7769 || alignment_support_scheme == dr_unaligned_supported);
7771 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7772 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7773 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7775 tree bump;
7776 tree vec_offset = NULL_TREE;
7777 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7779 aggr_type = NULL_TREE;
7780 bump = NULL_TREE;
7782 else if (memory_access_type == VMAT_GATHER_SCATTER)
7784 aggr_type = elem_type;
7785 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7786 &bump, &vec_offset);
7788 else
7790 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7791 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7792 else
7793 aggr_type = vectype;
7794 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7795 memory_access_type);
7798 if (mask)
7799 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7801 /* In case the vectorization factor (VF) is bigger than the number
7802 of elements that we can fit in a vectype (nunits), we have to generate
7803 more than one vector stmt - i.e - we need to "unroll" the
7804 vector stmt by a factor VF/nunits. */
7806 /* In case of interleaving (non-unit grouped access):
7808 S1: &base + 2 = x2
7809 S2: &base = x0
7810 S3: &base + 1 = x1
7811 S4: &base + 3 = x3
7813 We create vectorized stores starting from base address (the access of the
7814 first stmt in the chain (S2 in the above example), when the last store stmt
7815 of the chain (S4) is reached:
7817 VS1: &base = vx2
7818 VS2: &base + vec_size*1 = vx0
7819 VS3: &base + vec_size*2 = vx1
7820 VS4: &base + vec_size*3 = vx3
7822 Then permutation statements are generated:
7824 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7825 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7828 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7829 (the order of the data-refs in the output of vect_permute_store_chain
7830 corresponds to the order of scalar stmts in the interleaving chain - see
7831 the documentation of vect_permute_store_chain()).
7833 In case of both multiple types and interleaving, above vector stores and
7834 permutation stmts are created for every copy. The result vector stmts are
7835 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7836 STMT_VINFO_RELATED_STMT for the next copies.
7839 auto_vec<tree> vec_masks;
7840 tree vec_mask = NULL;
7841 auto_vec<tree> vec_offsets;
7842 auto_vec<vec<tree> > gvec_oprnds;
7843 gvec_oprnds.safe_grow_cleared (group_size, true);
7844 for (j = 0; j < ncopies; j++)
7846 gimple *new_stmt;
7847 if (j == 0)
7849 if (slp)
7851 /* Get vectorized arguments for SLP_NODE. */
7852 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7853 op, &vec_oprnds);
7854 vec_oprnd = vec_oprnds[0];
7856 else
7858 /* For interleaved stores we collect vectorized defs for all the
7859 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7860 used as an input to vect_permute_store_chain().
7862 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7863 and OPRNDS are of size 1. */
7864 stmt_vec_info next_stmt_info = first_stmt_info;
7865 for (i = 0; i < group_size; i++)
7867 /* Since gaps are not supported for interleaved stores,
7868 DR_GROUP_SIZE is the exact number of stmts in the chain.
7869 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7870 that there is no interleaving, DR_GROUP_SIZE is 1,
7871 and only one iteration of the loop will be executed. */
7872 op = vect_get_store_rhs (next_stmt_info);
7873 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7874 ncopies, op, &gvec_oprnds[i]);
7875 vec_oprnd = gvec_oprnds[i][0];
7876 dr_chain.quick_push (gvec_oprnds[i][0]);
7877 oprnds.quick_push (gvec_oprnds[i][0]);
7878 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7880 if (mask)
7882 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7883 mask, &vec_masks, mask_vectype);
7884 vec_mask = vec_masks[0];
7888 /* We should have catched mismatched types earlier. */
7889 gcc_assert (useless_type_conversion_p (vectype,
7890 TREE_TYPE (vec_oprnd)));
7891 bool simd_lane_access_p
7892 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7893 if (simd_lane_access_p
7894 && !loop_masks
7895 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7896 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7897 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7898 && integer_zerop (DR_INIT (first_dr_info->dr))
7899 && alias_sets_conflict_p (get_alias_set (aggr_type),
7900 get_alias_set (TREE_TYPE (ref_type))))
7902 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7903 dataref_offset = build_int_cst (ref_type, 0);
7905 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7907 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
7908 &dataref_ptr, &vec_offsets, ncopies);
7909 vec_offset = vec_offsets[0];
7911 else
7912 dataref_ptr
7913 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
7914 simd_lane_access_p ? loop : NULL,
7915 offset, &dummy, gsi, &ptr_incr,
7916 simd_lane_access_p, NULL_TREE, bump);
7918 else
7920 /* For interleaved stores we created vectorized defs for all the
7921 defs stored in OPRNDS in the previous iteration (previous copy).
7922 DR_CHAIN is then used as an input to vect_permute_store_chain().
7923 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7924 OPRNDS are of size 1. */
7925 for (i = 0; i < group_size; i++)
7927 vec_oprnd = gvec_oprnds[i][j];
7928 dr_chain[i] = gvec_oprnds[i][j];
7929 oprnds[i] = gvec_oprnds[i][j];
7931 if (mask)
7932 vec_mask = vec_masks[j];
7933 if (dataref_offset)
7934 dataref_offset
7935 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7936 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7937 vec_offset = vec_offsets[j];
7938 else
7939 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
7940 stmt_info, bump);
7943 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7945 tree vec_array;
7947 /* Get an array into which we can store the individual vectors. */
7948 vec_array = create_vector_array (vectype, vec_num);
7950 /* Invalidate the current contents of VEC_ARRAY. This should
7951 become an RTL clobber too, which prevents the vector registers
7952 from being upward-exposed. */
7953 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7955 /* Store the individual vectors into the array. */
7956 for (i = 0; i < vec_num; i++)
7958 vec_oprnd = dr_chain[i];
7959 write_vector_array (vinfo, stmt_info,
7960 gsi, vec_oprnd, vec_array, i);
7963 tree final_mask = NULL;
7964 if (loop_masks)
7965 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7966 vectype, j);
7967 if (vec_mask)
7968 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7969 vec_mask, gsi);
7971 gcall *call;
7972 if (final_mask)
7974 /* Emit:
7975 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7976 VEC_ARRAY). */
7977 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7978 tree alias_ptr = build_int_cst (ref_type, align);
7979 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7980 dataref_ptr, alias_ptr,
7981 final_mask, vec_array);
7983 else
7985 /* Emit:
7986 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7987 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7988 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7989 vec_array);
7990 gimple_call_set_lhs (call, data_ref);
7992 gimple_call_set_nothrow (call, true);
7993 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7994 new_stmt = call;
7996 /* Record that VEC_ARRAY is now dead. */
7997 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7999 else
8001 new_stmt = NULL;
8002 if (grouped_store)
8004 if (j == 0)
8005 result_chain.create (group_size);
8006 /* Permute. */
8007 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8008 gsi, &result_chain);
8011 stmt_vec_info next_stmt_info = first_stmt_info;
8012 for (i = 0; i < vec_num; i++)
8014 unsigned misalign;
8015 unsigned HOST_WIDE_INT align;
8017 tree final_mask = NULL_TREE;
8018 if (loop_masks)
8019 final_mask = vect_get_loop_mask (gsi, loop_masks,
8020 vec_num * ncopies,
8021 vectype, vec_num * j + i);
8022 if (vec_mask)
8023 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8024 vec_mask, gsi);
8026 if (memory_access_type == VMAT_GATHER_SCATTER)
8028 tree scale = size_int (gs_info.scale);
8029 gcall *call;
8030 if (loop_masks)
8031 call = gimple_build_call_internal
8032 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8033 scale, vec_oprnd, final_mask);
8034 else
8035 call = gimple_build_call_internal
8036 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8037 scale, vec_oprnd);
8038 gimple_call_set_nothrow (call, true);
8039 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8040 new_stmt = call;
8041 break;
8044 if (i > 0)
8045 /* Bump the vector pointer. */
8046 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8047 gsi, stmt_info, bump);
8049 if (slp)
8050 vec_oprnd = vec_oprnds[i];
8051 else if (grouped_store)
8052 /* For grouped stores vectorized defs are interleaved in
8053 vect_permute_store_chain(). */
8054 vec_oprnd = result_chain[i];
8056 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8057 if (aligned_access_p (first_dr_info))
8058 misalign = 0;
8059 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8061 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8062 misalign = 0;
8064 else
8065 misalign = DR_MISALIGNMENT (first_dr_info);
8066 if (dataref_offset == NULL_TREE
8067 && TREE_CODE (dataref_ptr) == SSA_NAME)
8068 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8069 misalign);
8071 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8073 tree perm_mask = perm_mask_for_reverse (vectype);
8074 tree perm_dest = vect_create_destination_var
8075 (vect_get_store_rhs (stmt_info), vectype);
8076 tree new_temp = make_ssa_name (perm_dest);
8078 /* Generate the permute statement. */
8079 gimple *perm_stmt
8080 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8081 vec_oprnd, perm_mask);
8082 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8084 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8085 vec_oprnd = new_temp;
8088 /* Arguments are ready. Create the new vector stmt. */
8089 if (final_mask)
8091 align = least_bit_hwi (misalign | align);
8092 tree ptr = build_int_cst (ref_type, align);
8093 gcall *call
8094 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8095 dataref_ptr, ptr,
8096 final_mask, vec_oprnd);
8097 gimple_call_set_nothrow (call, true);
8098 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8099 new_stmt = call;
8101 else if (loop_lens)
8103 tree final_len
8104 = vect_get_loop_len (loop_vinfo, loop_lens,
8105 vec_num * ncopies, vec_num * j + i);
8106 align = least_bit_hwi (misalign | align);
8107 tree ptr = build_int_cst (ref_type, align);
8108 machine_mode vmode = TYPE_MODE (vectype);
8109 opt_machine_mode new_ovmode
8110 = get_len_load_store_mode (vmode, false);
8111 machine_mode new_vmode = new_ovmode.require ();
8112 /* Need conversion if it's wrapped with VnQI. */
8113 if (vmode != new_vmode)
8115 tree new_vtype
8116 = build_vector_type_for_mode (unsigned_intQI_type_node,
8117 new_vmode);
8118 tree var
8119 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8120 vec_oprnd
8121 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8122 gassign *new_stmt
8123 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8124 vec_oprnd);
8125 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8126 gsi);
8127 vec_oprnd = var;
8129 gcall *call
8130 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8131 ptr, final_len, vec_oprnd);
8132 gimple_call_set_nothrow (call, true);
8133 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8134 new_stmt = call;
8136 else
8138 data_ref = fold_build2 (MEM_REF, vectype,
8139 dataref_ptr,
8140 dataref_offset
8141 ? dataref_offset
8142 : build_int_cst (ref_type, 0));
8143 if (aligned_access_p (first_dr_info))
8145 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8146 TREE_TYPE (data_ref)
8147 = build_aligned_type (TREE_TYPE (data_ref),
8148 align * BITS_PER_UNIT);
8149 else
8150 TREE_TYPE (data_ref)
8151 = build_aligned_type (TREE_TYPE (data_ref),
8152 TYPE_ALIGN (elem_type));
8153 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8154 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8155 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8158 if (slp)
8159 continue;
8161 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8162 if (!next_stmt_info)
8163 break;
8166 if (!slp)
8168 if (j == 0)
8169 *vec_stmt = new_stmt;
8170 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8174 for (i = 0; i < group_size; ++i)
8176 vec<tree> oprndsi = gvec_oprnds[i];
8177 oprndsi.release ();
8179 oprnds.release ();
8180 result_chain.release ();
8181 vec_oprnds.release ();
8183 return true;
8186 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8187 VECTOR_CST mask. No checks are made that the target platform supports the
8188 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8189 vect_gen_perm_mask_checked. */
8191 tree
8192 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8194 tree mask_type;
8196 poly_uint64 nunits = sel.length ();
8197 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8199 mask_type = build_vector_type (ssizetype, nunits);
8200 return vec_perm_indices_to_tree (mask_type, sel);
8203 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8204 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8206 tree
8207 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8209 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8210 return vect_gen_perm_mask_any (vectype, sel);
8213 /* Given a vector variable X and Y, that was generated for the scalar
8214 STMT_INFO, generate instructions to permute the vector elements of X and Y
8215 using permutation mask MASK_VEC, insert them at *GSI and return the
8216 permuted vector variable. */
8218 static tree
8219 permute_vec_elements (vec_info *vinfo,
8220 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8221 gimple_stmt_iterator *gsi)
8223 tree vectype = TREE_TYPE (x);
8224 tree perm_dest, data_ref;
8225 gimple *perm_stmt;
8227 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8228 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8229 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8230 else
8231 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8232 data_ref = make_ssa_name (perm_dest);
8234 /* Generate the permute statement. */
8235 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8236 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8238 return data_ref;
8241 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8242 inserting them on the loops preheader edge. Returns true if we
8243 were successful in doing so (and thus STMT_INFO can be moved then),
8244 otherwise returns false. */
8246 static bool
8247 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8249 ssa_op_iter i;
8250 tree op;
8251 bool any = false;
8253 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8255 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8256 if (!gimple_nop_p (def_stmt)
8257 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8259 /* Make sure we don't need to recurse. While we could do
8260 so in simple cases when there are more complex use webs
8261 we don't have an easy way to preserve stmt order to fulfil
8262 dependencies within them. */
8263 tree op2;
8264 ssa_op_iter i2;
8265 if (gimple_code (def_stmt) == GIMPLE_PHI)
8266 return false;
8267 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8269 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8270 if (!gimple_nop_p (def_stmt2)
8271 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8272 return false;
8274 any = true;
8278 if (!any)
8279 return true;
8281 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8283 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8284 if (!gimple_nop_p (def_stmt)
8285 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8287 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8288 gsi_remove (&gsi, false);
8289 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8293 return true;
8296 /* vectorizable_load.
8298 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8299 that can be vectorized.
8300 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8301 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8302 Return true if STMT_INFO is vectorizable in this way. */
8304 static bool
8305 vectorizable_load (vec_info *vinfo,
8306 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8307 gimple **vec_stmt, slp_tree slp_node,
8308 stmt_vector_for_cost *cost_vec)
8310 tree scalar_dest;
8311 tree vec_dest = NULL;
8312 tree data_ref = NULL;
8313 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8314 class loop *loop = NULL;
8315 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8316 bool nested_in_vect_loop = false;
8317 tree elem_type;
8318 tree new_temp;
8319 machine_mode mode;
8320 tree dummy;
8321 tree dataref_ptr = NULL_TREE;
8322 tree dataref_offset = NULL_TREE;
8323 gimple *ptr_incr = NULL;
8324 int ncopies;
8325 int i, j;
8326 unsigned int group_size;
8327 poly_uint64 group_gap_adj;
8328 tree msq = NULL_TREE, lsq;
8329 tree offset = NULL_TREE;
8330 tree byte_offset = NULL_TREE;
8331 tree realignment_token = NULL_TREE;
8332 gphi *phi = NULL;
8333 vec<tree> dr_chain = vNULL;
8334 bool grouped_load = false;
8335 stmt_vec_info first_stmt_info;
8336 stmt_vec_info first_stmt_info_for_drptr = NULL;
8337 bool compute_in_loop = false;
8338 class loop *at_loop;
8339 int vec_num;
8340 bool slp = (slp_node != NULL);
8341 bool slp_perm = false;
8342 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8343 poly_uint64 vf;
8344 tree aggr_type;
8345 gather_scatter_info gs_info;
8346 tree ref_type;
8347 enum vect_def_type mask_dt = vect_unknown_def_type;
8349 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8350 return false;
8352 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8353 && ! vec_stmt)
8354 return false;
8356 if (!STMT_VINFO_DATA_REF (stmt_info))
8357 return false;
8359 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8360 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8361 which can be different when reduction chains were re-ordered.
8362 Now that we figured we're a dataref reset stmt_info back to
8363 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8364 refactored in a way to maintain the dr_vec_info pointer for the
8365 relevant access explicitely. */
8366 stmt_vec_info orig_stmt_info = stmt_info;
8367 if (slp_node)
8368 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8370 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8371 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8373 scalar_dest = gimple_assign_lhs (assign);
8374 if (TREE_CODE (scalar_dest) != SSA_NAME)
8375 return false;
8377 tree_code code = gimple_assign_rhs_code (assign);
8378 if (code != ARRAY_REF
8379 && code != BIT_FIELD_REF
8380 && code != INDIRECT_REF
8381 && code != COMPONENT_REF
8382 && code != IMAGPART_EXPR
8383 && code != REALPART_EXPR
8384 && code != MEM_REF
8385 && TREE_CODE_CLASS (code) != tcc_declaration)
8386 return false;
8388 else
8390 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8391 if (!call || !gimple_call_internal_p (call))
8392 return false;
8394 internal_fn ifn = gimple_call_internal_fn (call);
8395 if (!internal_load_fn_p (ifn))
8396 return false;
8398 scalar_dest = gimple_call_lhs (call);
8399 if (!scalar_dest)
8400 return false;
8402 int mask_index = internal_fn_mask_index (ifn);
8403 if (mask_index >= 0)
8405 mask = gimple_call_arg (call, mask_index);
8406 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8407 &mask_vectype))
8408 return false;
8412 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8413 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8415 if (loop_vinfo)
8417 loop = LOOP_VINFO_LOOP (loop_vinfo);
8418 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8419 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8421 else
8422 vf = 1;
8424 /* Multiple types in SLP are handled by creating the appropriate number of
8425 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8426 case of SLP. */
8427 if (slp)
8428 ncopies = 1;
8429 else
8430 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8432 gcc_assert (ncopies >= 1);
8434 /* FORNOW. This restriction should be relaxed. */
8435 if (nested_in_vect_loop && ncopies > 1)
8437 if (dump_enabled_p ())
8438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8439 "multiple types in nested loop.\n");
8440 return false;
8443 /* Invalidate assumptions made by dependence analysis when vectorization
8444 on the unrolled body effectively re-orders stmts. */
8445 if (ncopies > 1
8446 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8447 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8448 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8450 if (dump_enabled_p ())
8451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8452 "cannot perform implicit CSE when unrolling "
8453 "with negative dependence distance\n");
8454 return false;
8457 elem_type = TREE_TYPE (vectype);
8458 mode = TYPE_MODE (vectype);
8460 /* FORNOW. In some cases can vectorize even if data-type not supported
8461 (e.g. - data copies). */
8462 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8464 if (dump_enabled_p ())
8465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8466 "Aligned load, but unsupported type.\n");
8467 return false;
8470 /* Check if the load is a part of an interleaving chain. */
8471 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8473 grouped_load = true;
8474 /* FORNOW */
8475 gcc_assert (!nested_in_vect_loop);
8476 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8478 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8479 group_size = DR_GROUP_SIZE (first_stmt_info);
8481 /* Refuse non-SLP vectorization of SLP-only groups. */
8482 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8484 if (dump_enabled_p ())
8485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8486 "cannot vectorize load in non-SLP mode.\n");
8487 return false;
8490 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8492 slp_perm = true;
8494 if (!loop_vinfo)
8496 /* In BB vectorization we may not actually use a loaded vector
8497 accessing elements in excess of DR_GROUP_SIZE. */
8498 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8499 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8500 unsigned HOST_WIDE_INT nunits;
8501 unsigned j, k, maxk = 0;
8502 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8503 if (k > maxk)
8504 maxk = k;
8505 tree vectype = STMT_VINFO_VECTYPE (group_info);
8506 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8507 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8509 if (dump_enabled_p ())
8510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8511 "BB vectorization with gaps at the end of "
8512 "a load is not supported\n");
8513 return false;
8517 auto_vec<tree> tem;
8518 unsigned n_perms;
8519 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8520 true, &n_perms))
8522 if (dump_enabled_p ())
8523 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8524 vect_location,
8525 "unsupported load permutation\n");
8526 return false;
8530 /* Invalidate assumptions made by dependence analysis when vectorization
8531 on the unrolled body effectively re-orders stmts. */
8532 if (!PURE_SLP_STMT (stmt_info)
8533 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8534 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8535 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8537 if (dump_enabled_p ())
8538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8539 "cannot perform implicit CSE when performing "
8540 "group loads with negative dependence distance\n");
8541 return false;
8544 else
8545 group_size = 1;
8547 vect_memory_access_type memory_access_type;
8548 enum dr_alignment_support alignment_support_scheme;
8549 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8550 ncopies, &memory_access_type,
8551 &alignment_support_scheme, &gs_info))
8552 return false;
8554 if (mask)
8556 if (memory_access_type == VMAT_CONTIGUOUS)
8558 machine_mode vec_mode = TYPE_MODE (vectype);
8559 if (!VECTOR_MODE_P (vec_mode)
8560 || !can_vec_mask_load_store_p (vec_mode,
8561 TYPE_MODE (mask_vectype), true))
8562 return false;
8564 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8565 && memory_access_type != VMAT_GATHER_SCATTER)
8567 if (dump_enabled_p ())
8568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8569 "unsupported access type for masked load.\n");
8570 return false;
8574 if (!vec_stmt) /* transformation not required. */
8576 if (!slp)
8577 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8579 if (loop_vinfo
8580 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8581 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8582 group_size, memory_access_type,
8583 &gs_info, mask);
8585 if (dump_enabled_p ()
8586 && memory_access_type != VMAT_ELEMENTWISE
8587 && memory_access_type != VMAT_GATHER_SCATTER
8588 && alignment_support_scheme != dr_aligned)
8589 dump_printf_loc (MSG_NOTE, vect_location,
8590 "Vectorizing an unaligned access.\n");
8592 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8593 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8594 slp_node, cost_vec);
8595 return true;
8598 if (!slp)
8599 gcc_assert (memory_access_type
8600 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8602 if (dump_enabled_p ())
8603 dump_printf_loc (MSG_NOTE, vect_location,
8604 "transform load. ncopies = %d\n", ncopies);
8606 /* Transform. */
8608 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8609 ensure_base_align (dr_info);
8611 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8613 vect_build_gather_load_calls (vinfo,
8614 stmt_info, gsi, vec_stmt, &gs_info, mask);
8615 return true;
8618 if (memory_access_type == VMAT_INVARIANT)
8620 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8621 /* If we have versioned for aliasing or the loop doesn't
8622 have any data dependencies that would preclude this,
8623 then we are sure this is a loop invariant load and
8624 thus we can insert it on the preheader edge. */
8625 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8626 && !nested_in_vect_loop
8627 && hoist_defs_of_uses (stmt_info, loop));
8628 if (hoist_p)
8630 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8631 if (dump_enabled_p ())
8632 dump_printf_loc (MSG_NOTE, vect_location,
8633 "hoisting out of the vectorized loop: %G", stmt);
8634 scalar_dest = copy_ssa_name (scalar_dest);
8635 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8636 gsi_insert_on_edge_immediate
8637 (loop_preheader_edge (loop),
8638 gimple_build_assign (scalar_dest, rhs));
8640 /* These copies are all equivalent, but currently the representation
8641 requires a separate STMT_VINFO_VEC_STMT for each one. */
8642 gimple_stmt_iterator gsi2 = *gsi;
8643 gsi_next (&gsi2);
8644 for (j = 0; j < ncopies; j++)
8646 if (hoist_p)
8647 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8648 vectype, NULL);
8649 else
8650 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8651 vectype, &gsi2);
8652 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8653 if (slp)
8654 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8655 else
8657 if (j == 0)
8658 *vec_stmt = new_stmt;
8659 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8662 return true;
8665 if (memory_access_type == VMAT_ELEMENTWISE
8666 || memory_access_type == VMAT_STRIDED_SLP)
8668 gimple_stmt_iterator incr_gsi;
8669 bool insert_after;
8670 tree offvar;
8671 tree ivstep;
8672 tree running_off;
8673 vec<constructor_elt, va_gc> *v = NULL;
8674 tree stride_base, stride_step, alias_off;
8675 /* Checked by get_load_store_type. */
8676 unsigned int const_nunits = nunits.to_constant ();
8677 unsigned HOST_WIDE_INT cst_offset = 0;
8678 tree dr_offset;
8680 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8681 gcc_assert (!nested_in_vect_loop);
8683 if (grouped_load)
8685 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8686 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8688 else
8690 first_stmt_info = stmt_info;
8691 first_dr_info = dr_info;
8693 if (slp && grouped_load)
8695 group_size = DR_GROUP_SIZE (first_stmt_info);
8696 ref_type = get_group_alias_ptr_type (first_stmt_info);
8698 else
8700 if (grouped_load)
8701 cst_offset
8702 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8703 * vect_get_place_in_interleaving_chain (stmt_info,
8704 first_stmt_info));
8705 group_size = 1;
8706 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8709 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8710 stride_base
8711 = fold_build_pointer_plus
8712 (DR_BASE_ADDRESS (first_dr_info->dr),
8713 size_binop (PLUS_EXPR,
8714 convert_to_ptrofftype (dr_offset),
8715 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8716 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8718 /* For a load with loop-invariant (but other than power-of-2)
8719 stride (i.e. not a grouped access) like so:
8721 for (i = 0; i < n; i += stride)
8722 ... = array[i];
8724 we generate a new induction variable and new accesses to
8725 form a new vector (or vectors, depending on ncopies):
8727 for (j = 0; ; j += VF*stride)
8728 tmp1 = array[j];
8729 tmp2 = array[j + stride];
8731 vectemp = {tmp1, tmp2, ...}
8734 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8735 build_int_cst (TREE_TYPE (stride_step), vf));
8737 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8739 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8740 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8741 create_iv (stride_base, ivstep, NULL,
8742 loop, &incr_gsi, insert_after,
8743 &offvar, NULL);
8745 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8747 running_off = offvar;
8748 alias_off = build_int_cst (ref_type, 0);
8749 int nloads = const_nunits;
8750 int lnel = 1;
8751 tree ltype = TREE_TYPE (vectype);
8752 tree lvectype = vectype;
8753 auto_vec<tree> dr_chain;
8754 if (memory_access_type == VMAT_STRIDED_SLP)
8756 if (group_size < const_nunits)
8758 /* First check if vec_init optab supports construction from vector
8759 elts directly. Otherwise avoid emitting a constructor of
8760 vector elements by performing the loads using an integer type
8761 of the same size, constructing a vector of those and then
8762 re-interpreting it as the original vector type. This avoids a
8763 huge runtime penalty due to the general inability to perform
8764 store forwarding from smaller stores to a larger load. */
8765 tree ptype;
8766 tree vtype
8767 = vector_vector_composition_type (vectype,
8768 const_nunits / group_size,
8769 &ptype);
8770 if (vtype != NULL_TREE)
8772 nloads = const_nunits / group_size;
8773 lnel = group_size;
8774 lvectype = vtype;
8775 ltype = ptype;
8778 else
8780 nloads = 1;
8781 lnel = const_nunits;
8782 ltype = vectype;
8784 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8786 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8787 else if (nloads == 1)
8788 ltype = vectype;
8790 if (slp)
8792 /* For SLP permutation support we need to load the whole group,
8793 not only the number of vector stmts the permutation result
8794 fits in. */
8795 if (slp_perm)
8797 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8798 variable VF. */
8799 unsigned int const_vf = vf.to_constant ();
8800 ncopies = CEIL (group_size * const_vf, const_nunits);
8801 dr_chain.create (ncopies);
8803 else
8804 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8806 unsigned int group_el = 0;
8807 unsigned HOST_WIDE_INT
8808 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8809 for (j = 0; j < ncopies; j++)
8811 if (nloads > 1)
8812 vec_alloc (v, nloads);
8813 gimple *new_stmt = NULL;
8814 for (i = 0; i < nloads; i++)
8816 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8817 group_el * elsz + cst_offset);
8818 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8819 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8820 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8821 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8822 if (nloads > 1)
8823 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8824 gimple_assign_lhs (new_stmt));
8826 group_el += lnel;
8827 if (! slp
8828 || group_el == group_size)
8830 tree newoff = copy_ssa_name (running_off);
8831 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8832 running_off, stride_step);
8833 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8835 running_off = newoff;
8836 group_el = 0;
8839 if (nloads > 1)
8841 tree vec_inv = build_constructor (lvectype, v);
8842 new_temp = vect_init_vector (vinfo, stmt_info,
8843 vec_inv, lvectype, gsi);
8844 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8845 if (lvectype != vectype)
8847 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8848 VIEW_CONVERT_EXPR,
8849 build1 (VIEW_CONVERT_EXPR,
8850 vectype, new_temp));
8851 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8855 if (slp)
8857 if (slp_perm)
8858 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8859 else
8860 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8862 else
8864 if (j == 0)
8865 *vec_stmt = new_stmt;
8866 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8869 if (slp_perm)
8871 unsigned n_perms;
8872 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8873 false, &n_perms);
8875 return true;
8878 if (memory_access_type == VMAT_GATHER_SCATTER
8879 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8880 grouped_load = false;
8882 if (grouped_load)
8884 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8885 group_size = DR_GROUP_SIZE (first_stmt_info);
8886 /* For SLP vectorization we directly vectorize a subchain
8887 without permutation. */
8888 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8889 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8890 /* For BB vectorization always use the first stmt to base
8891 the data ref pointer on. */
8892 if (bb_vinfo)
8893 first_stmt_info_for_drptr
8894 = vect_find_first_scalar_stmt_in_slp (slp_node);
8896 /* Check if the chain of loads is already vectorized. */
8897 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
8898 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8899 ??? But we can only do so if there is exactly one
8900 as we have no way to get at the rest. Leave the CSE
8901 opportunity alone.
8902 ??? With the group load eventually participating
8903 in multiple different permutations (having multiple
8904 slp nodes which refer to the same group) the CSE
8905 is even wrong code. See PR56270. */
8906 && !slp)
8908 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8909 return true;
8911 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8912 group_gap_adj = 0;
8914 /* VEC_NUM is the number of vect stmts to be created for this group. */
8915 if (slp)
8917 grouped_load = false;
8918 /* If an SLP permutation is from N elements to N elements,
8919 and if one vector holds a whole number of N, we can load
8920 the inputs to the permutation in the same way as an
8921 unpermuted sequence. In other cases we need to load the
8922 whole group, not only the number of vector stmts the
8923 permutation result fits in. */
8924 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
8925 if (slp_perm
8926 && (group_size != scalar_lanes
8927 || !multiple_p (nunits, group_size)))
8929 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8930 variable VF; see vect_transform_slp_perm_load. */
8931 unsigned int const_vf = vf.to_constant ();
8932 unsigned int const_nunits = nunits.to_constant ();
8933 vec_num = CEIL (group_size * const_vf, const_nunits);
8934 group_gap_adj = vf * group_size - nunits * vec_num;
8936 else
8938 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8939 group_gap_adj
8940 = group_size - scalar_lanes;
8943 else
8944 vec_num = group_size;
8946 ref_type = get_group_alias_ptr_type (first_stmt_info);
8948 else
8950 first_stmt_info = stmt_info;
8951 first_dr_info = dr_info;
8952 group_size = vec_num = 1;
8953 group_gap_adj = 0;
8954 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8957 gcc_assert (alignment_support_scheme);
8958 vec_loop_masks *loop_masks
8959 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8960 ? &LOOP_VINFO_MASKS (loop_vinfo)
8961 : NULL);
8962 vec_loop_lens *loop_lens
8963 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8964 ? &LOOP_VINFO_LENS (loop_vinfo)
8965 : NULL);
8967 /* Shouldn't go with length-based approach if fully masked. */
8968 gcc_assert (!loop_lens || !loop_masks);
8970 /* Targets with store-lane instructions must not require explicit
8971 realignment. vect_supportable_dr_alignment always returns either
8972 dr_aligned or dr_unaligned_supported for masked operations. */
8973 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8974 && !mask
8975 && !loop_masks)
8976 || alignment_support_scheme == dr_aligned
8977 || alignment_support_scheme == dr_unaligned_supported);
8979 /* In case the vectorization factor (VF) is bigger than the number
8980 of elements that we can fit in a vectype (nunits), we have to generate
8981 more than one vector stmt - i.e - we need to "unroll" the
8982 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8983 from one copy of the vector stmt to the next, in the field
8984 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8985 stages to find the correct vector defs to be used when vectorizing
8986 stmts that use the defs of the current stmt. The example below
8987 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8988 need to create 4 vectorized stmts):
8990 before vectorization:
8991 RELATED_STMT VEC_STMT
8992 S1: x = memref - -
8993 S2: z = x + 1 - -
8995 step 1: vectorize stmt S1:
8996 We first create the vector stmt VS1_0, and, as usual, record a
8997 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8998 Next, we create the vector stmt VS1_1, and record a pointer to
8999 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9000 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9001 stmts and pointers:
9002 RELATED_STMT VEC_STMT
9003 VS1_0: vx0 = memref0 VS1_1 -
9004 VS1_1: vx1 = memref1 VS1_2 -
9005 VS1_2: vx2 = memref2 VS1_3 -
9006 VS1_3: vx3 = memref3 - -
9007 S1: x = load - VS1_0
9008 S2: z = x + 1 - -
9011 /* In case of interleaving (non-unit grouped access):
9013 S1: x2 = &base + 2
9014 S2: x0 = &base
9015 S3: x1 = &base + 1
9016 S4: x3 = &base + 3
9018 Vectorized loads are created in the order of memory accesses
9019 starting from the access of the first stmt of the chain:
9021 VS1: vx0 = &base
9022 VS2: vx1 = &base + vec_size*1
9023 VS3: vx3 = &base + vec_size*2
9024 VS4: vx4 = &base + vec_size*3
9026 Then permutation statements are generated:
9028 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9029 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9032 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9033 (the order of the data-refs in the output of vect_permute_load_chain
9034 corresponds to the order of scalar stmts in the interleaving chain - see
9035 the documentation of vect_permute_load_chain()).
9036 The generation of permutation stmts and recording them in
9037 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9039 In case of both multiple types and interleaving, the vector loads and
9040 permutation stmts above are created for every copy. The result vector
9041 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9042 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9044 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9045 on a target that supports unaligned accesses (dr_unaligned_supported)
9046 we generate the following code:
9047 p = initial_addr;
9048 indx = 0;
9049 loop {
9050 p = p + indx * vectype_size;
9051 vec_dest = *(p);
9052 indx = indx + 1;
9055 Otherwise, the data reference is potentially unaligned on a target that
9056 does not support unaligned accesses (dr_explicit_realign_optimized) -
9057 then generate the following code, in which the data in each iteration is
9058 obtained by two vector loads, one from the previous iteration, and one
9059 from the current iteration:
9060 p1 = initial_addr;
9061 msq_init = *(floor(p1))
9062 p2 = initial_addr + VS - 1;
9063 realignment_token = call target_builtin;
9064 indx = 0;
9065 loop {
9066 p2 = p2 + indx * vectype_size
9067 lsq = *(floor(p2))
9068 vec_dest = realign_load (msq, lsq, realignment_token)
9069 indx = indx + 1;
9070 msq = lsq;
9071 } */
9073 /* If the misalignment remains the same throughout the execution of the
9074 loop, we can create the init_addr and permutation mask at the loop
9075 preheader. Otherwise, it needs to be created inside the loop.
9076 This can only occur when vectorizing memory accesses in the inner-loop
9077 nested within an outer-loop that is being vectorized. */
9079 if (nested_in_vect_loop
9080 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9081 GET_MODE_SIZE (TYPE_MODE (vectype))))
9083 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9084 compute_in_loop = true;
9087 bool diff_first_stmt_info
9088 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9090 if ((alignment_support_scheme == dr_explicit_realign_optimized
9091 || alignment_support_scheme == dr_explicit_realign)
9092 && !compute_in_loop)
9094 /* If we have different first_stmt_info, we can't set up realignment
9095 here, since we can't guarantee first_stmt_info DR has been
9096 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9097 distance from first_stmt_info DR instead as below. */
9098 if (!diff_first_stmt_info)
9099 msq = vect_setup_realignment (vinfo,
9100 first_stmt_info, gsi, &realignment_token,
9101 alignment_support_scheme, NULL_TREE,
9102 &at_loop);
9103 if (alignment_support_scheme == dr_explicit_realign_optimized)
9105 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9106 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9107 size_one_node);
9108 gcc_assert (!first_stmt_info_for_drptr);
9111 else
9112 at_loop = loop;
9114 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9115 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9117 tree bump;
9118 tree vec_offset = NULL_TREE;
9119 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9121 aggr_type = NULL_TREE;
9122 bump = NULL_TREE;
9124 else if (memory_access_type == VMAT_GATHER_SCATTER)
9126 aggr_type = elem_type;
9127 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9128 &bump, &vec_offset);
9130 else
9132 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9133 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9134 else
9135 aggr_type = vectype;
9136 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9137 memory_access_type);
9140 vec<tree> vec_offsets = vNULL;
9141 auto_vec<tree> vec_masks;
9142 if (mask)
9143 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9144 mask, &vec_masks, mask_vectype, NULL_TREE);
9145 tree vec_mask = NULL_TREE;
9146 poly_uint64 group_elt = 0;
9147 for (j = 0; j < ncopies; j++)
9149 /* 1. Create the vector or array pointer update chain. */
9150 if (j == 0)
9152 bool simd_lane_access_p
9153 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9154 if (simd_lane_access_p
9155 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9156 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9157 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9158 && integer_zerop (DR_INIT (first_dr_info->dr))
9159 && alias_sets_conflict_p (get_alias_set (aggr_type),
9160 get_alias_set (TREE_TYPE (ref_type)))
9161 && (alignment_support_scheme == dr_aligned
9162 || alignment_support_scheme == dr_unaligned_supported))
9164 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9165 dataref_offset = build_int_cst (ref_type, 0);
9167 else if (diff_first_stmt_info)
9169 dataref_ptr
9170 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9171 aggr_type, at_loop, offset, &dummy,
9172 gsi, &ptr_incr, simd_lane_access_p,
9173 byte_offset, bump);
9174 /* Adjust the pointer by the difference to first_stmt. */
9175 data_reference_p ptrdr
9176 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9177 tree diff
9178 = fold_convert (sizetype,
9179 size_binop (MINUS_EXPR,
9180 DR_INIT (first_dr_info->dr),
9181 DR_INIT (ptrdr)));
9182 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9183 stmt_info, diff);
9184 if (alignment_support_scheme == dr_explicit_realign)
9186 msq = vect_setup_realignment (vinfo,
9187 first_stmt_info_for_drptr, gsi,
9188 &realignment_token,
9189 alignment_support_scheme,
9190 dataref_ptr, &at_loop);
9191 gcc_assert (!compute_in_loop);
9194 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9196 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9197 &dataref_ptr, &vec_offsets, ncopies);
9198 vec_offset = vec_offsets[0];
9200 else
9201 dataref_ptr
9202 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9203 at_loop,
9204 offset, &dummy, gsi, &ptr_incr,
9205 simd_lane_access_p,
9206 byte_offset, bump);
9207 if (mask)
9208 vec_mask = vec_masks[0];
9210 else
9212 if (dataref_offset)
9213 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9214 bump);
9215 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9216 vec_offset = vec_offsets[j];
9217 else
9218 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9219 stmt_info, bump);
9220 if (mask)
9221 vec_mask = vec_masks[j];
9224 if (grouped_load || slp_perm)
9225 dr_chain.create (vec_num);
9227 gimple *new_stmt = NULL;
9228 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9230 tree vec_array;
9232 vec_array = create_vector_array (vectype, vec_num);
9234 tree final_mask = NULL_TREE;
9235 if (loop_masks)
9236 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9237 vectype, j);
9238 if (vec_mask)
9239 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9240 vec_mask, gsi);
9242 gcall *call;
9243 if (final_mask)
9245 /* Emit:
9246 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9247 VEC_MASK). */
9248 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9249 tree alias_ptr = build_int_cst (ref_type, align);
9250 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9251 dataref_ptr, alias_ptr,
9252 final_mask);
9254 else
9256 /* Emit:
9257 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9258 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9259 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9261 gimple_call_set_lhs (call, vec_array);
9262 gimple_call_set_nothrow (call, true);
9263 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9264 new_stmt = call;
9266 /* Extract each vector into an SSA_NAME. */
9267 for (i = 0; i < vec_num; i++)
9269 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9270 vec_array, i);
9271 dr_chain.quick_push (new_temp);
9274 /* Record the mapping between SSA_NAMEs and statements. */
9275 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9277 /* Record that VEC_ARRAY is now dead. */
9278 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9280 else
9282 for (i = 0; i < vec_num; i++)
9284 tree final_mask = NULL_TREE;
9285 if (loop_masks
9286 && memory_access_type != VMAT_INVARIANT)
9287 final_mask = vect_get_loop_mask (gsi, loop_masks,
9288 vec_num * ncopies,
9289 vectype, vec_num * j + i);
9290 if (vec_mask)
9291 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9292 vec_mask, gsi);
9294 if (i > 0)
9295 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9296 gsi, stmt_info, bump);
9298 /* 2. Create the vector-load in the loop. */
9299 switch (alignment_support_scheme)
9301 case dr_aligned:
9302 case dr_unaligned_supported:
9304 unsigned int misalign;
9305 unsigned HOST_WIDE_INT align;
9307 if (memory_access_type == VMAT_GATHER_SCATTER)
9309 tree zero = build_zero_cst (vectype);
9310 tree scale = size_int (gs_info.scale);
9311 gcall *call;
9312 if (loop_masks)
9313 call = gimple_build_call_internal
9314 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9315 vec_offset, scale, zero, final_mask);
9316 else
9317 call = gimple_build_call_internal
9318 (IFN_GATHER_LOAD, 4, dataref_ptr,
9319 vec_offset, scale, zero);
9320 gimple_call_set_nothrow (call, true);
9321 new_stmt = call;
9322 data_ref = NULL_TREE;
9323 break;
9326 align =
9327 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9328 if (alignment_support_scheme == dr_aligned)
9330 gcc_assert (aligned_access_p (first_dr_info));
9331 misalign = 0;
9333 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9335 align = dr_alignment
9336 (vect_dr_behavior (vinfo, first_dr_info));
9337 misalign = 0;
9339 else
9340 misalign = DR_MISALIGNMENT (first_dr_info);
9341 if (dataref_offset == NULL_TREE
9342 && TREE_CODE (dataref_ptr) == SSA_NAME)
9343 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9344 align, misalign);
9346 if (final_mask)
9348 align = least_bit_hwi (misalign | align);
9349 tree ptr = build_int_cst (ref_type, align);
9350 gcall *call
9351 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9352 dataref_ptr, ptr,
9353 final_mask);
9354 gimple_call_set_nothrow (call, true);
9355 new_stmt = call;
9356 data_ref = NULL_TREE;
9358 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9360 tree final_len
9361 = vect_get_loop_len (loop_vinfo, loop_lens,
9362 vec_num * ncopies,
9363 vec_num * j + i);
9364 align = least_bit_hwi (misalign | align);
9365 tree ptr = build_int_cst (ref_type, align);
9366 gcall *call
9367 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9368 dataref_ptr, ptr,
9369 final_len);
9370 gimple_call_set_nothrow (call, true);
9371 new_stmt = call;
9372 data_ref = NULL_TREE;
9374 /* Need conversion if it's wrapped with VnQI. */
9375 machine_mode vmode = TYPE_MODE (vectype);
9376 opt_machine_mode new_ovmode
9377 = get_len_load_store_mode (vmode, true);
9378 machine_mode new_vmode = new_ovmode.require ();
9379 if (vmode != new_vmode)
9381 tree qi_type = unsigned_intQI_type_node;
9382 tree new_vtype
9383 = build_vector_type_for_mode (qi_type, new_vmode);
9384 tree var = vect_get_new_ssa_name (new_vtype,
9385 vect_simple_var);
9386 gimple_set_lhs (call, var);
9387 vect_finish_stmt_generation (vinfo, stmt_info, call,
9388 gsi);
9389 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9390 new_stmt
9391 = gimple_build_assign (vec_dest,
9392 VIEW_CONVERT_EXPR, op);
9395 else
9397 tree ltype = vectype;
9398 tree new_vtype = NULL_TREE;
9399 unsigned HOST_WIDE_INT gap
9400 = DR_GROUP_GAP (first_stmt_info);
9401 unsigned int vect_align
9402 = vect_known_alignment_in_bytes (first_dr_info);
9403 unsigned int scalar_dr_size
9404 = vect_get_scalar_dr_size (first_dr_info);
9405 /* If there's no peeling for gaps but we have a gap
9406 with slp loads then load the lower half of the
9407 vector only. See get_group_load_store_type for
9408 when we apply this optimization. */
9409 if (slp
9410 && loop_vinfo
9411 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9412 && gap != 0
9413 && known_eq (nunits, (group_size - gap) * 2)
9414 && known_eq (nunits, group_size)
9415 && gap >= (vect_align / scalar_dr_size))
9417 tree half_vtype;
9418 new_vtype
9419 = vector_vector_composition_type (vectype, 2,
9420 &half_vtype);
9421 if (new_vtype != NULL_TREE)
9422 ltype = half_vtype;
9424 tree offset
9425 = (dataref_offset ? dataref_offset
9426 : build_int_cst (ref_type, 0));
9427 if (ltype != vectype
9428 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9430 unsigned HOST_WIDE_INT gap_offset
9431 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9432 tree gapcst = build_int_cst (ref_type, gap_offset);
9433 offset = size_binop (PLUS_EXPR, offset, gapcst);
9435 data_ref
9436 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9437 if (alignment_support_scheme == dr_aligned)
9439 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9440 TREE_TYPE (data_ref)
9441 = build_aligned_type (TREE_TYPE (data_ref),
9442 align * BITS_PER_UNIT);
9443 else
9444 TREE_TYPE (data_ref)
9445 = build_aligned_type (TREE_TYPE (data_ref),
9446 TYPE_ALIGN (elem_type));
9447 if (ltype != vectype)
9449 vect_copy_ref_info (data_ref,
9450 DR_REF (first_dr_info->dr));
9451 tree tem = make_ssa_name (ltype);
9452 new_stmt = gimple_build_assign (tem, data_ref);
9453 vect_finish_stmt_generation (vinfo, stmt_info,
9454 new_stmt, gsi);
9455 data_ref = NULL;
9456 vec<constructor_elt, va_gc> *v;
9457 vec_alloc (v, 2);
9458 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9460 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9461 build_zero_cst (ltype));
9462 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9464 else
9466 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9467 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9468 build_zero_cst (ltype));
9470 gcc_assert (new_vtype != NULL_TREE);
9471 if (new_vtype == vectype)
9472 new_stmt = gimple_build_assign (
9473 vec_dest, build_constructor (vectype, v));
9474 else
9476 tree new_vname = make_ssa_name (new_vtype);
9477 new_stmt = gimple_build_assign (
9478 new_vname, build_constructor (new_vtype, v));
9479 vect_finish_stmt_generation (vinfo, stmt_info,
9480 new_stmt, gsi);
9481 new_stmt = gimple_build_assign (
9482 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9483 new_vname));
9487 break;
9489 case dr_explicit_realign:
9491 tree ptr, bump;
9493 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9495 if (compute_in_loop)
9496 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9497 &realignment_token,
9498 dr_explicit_realign,
9499 dataref_ptr, NULL);
9501 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9502 ptr = copy_ssa_name (dataref_ptr);
9503 else
9504 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9505 // For explicit realign the target alignment should be
9506 // known at compile time.
9507 unsigned HOST_WIDE_INT align =
9508 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9509 new_stmt = gimple_build_assign
9510 (ptr, BIT_AND_EXPR, dataref_ptr,
9511 build_int_cst
9512 (TREE_TYPE (dataref_ptr),
9513 -(HOST_WIDE_INT) align));
9514 vect_finish_stmt_generation (vinfo, stmt_info,
9515 new_stmt, gsi);
9516 data_ref
9517 = build2 (MEM_REF, vectype, ptr,
9518 build_int_cst (ref_type, 0));
9519 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9520 vec_dest = vect_create_destination_var (scalar_dest,
9521 vectype);
9522 new_stmt = gimple_build_assign (vec_dest, data_ref);
9523 new_temp = make_ssa_name (vec_dest, new_stmt);
9524 gimple_assign_set_lhs (new_stmt, new_temp);
9525 gimple_move_vops (new_stmt, stmt_info->stmt);
9526 vect_finish_stmt_generation (vinfo, stmt_info,
9527 new_stmt, gsi);
9528 msq = new_temp;
9530 bump = size_binop (MULT_EXPR, vs,
9531 TYPE_SIZE_UNIT (elem_type));
9532 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9533 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9534 stmt_info, bump);
9535 new_stmt = gimple_build_assign
9536 (NULL_TREE, BIT_AND_EXPR, ptr,
9537 build_int_cst
9538 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9539 ptr = copy_ssa_name (ptr, new_stmt);
9540 gimple_assign_set_lhs (new_stmt, ptr);
9541 vect_finish_stmt_generation (vinfo, stmt_info,
9542 new_stmt, gsi);
9543 data_ref
9544 = build2 (MEM_REF, vectype, ptr,
9545 build_int_cst (ref_type, 0));
9546 break;
9548 case dr_explicit_realign_optimized:
9550 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9551 new_temp = copy_ssa_name (dataref_ptr);
9552 else
9553 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9554 // We should only be doing this if we know the target
9555 // alignment at compile time.
9556 unsigned HOST_WIDE_INT align =
9557 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9558 new_stmt = gimple_build_assign
9559 (new_temp, BIT_AND_EXPR, dataref_ptr,
9560 build_int_cst (TREE_TYPE (dataref_ptr),
9561 -(HOST_WIDE_INT) align));
9562 vect_finish_stmt_generation (vinfo, stmt_info,
9563 new_stmt, gsi);
9564 data_ref
9565 = build2 (MEM_REF, vectype, new_temp,
9566 build_int_cst (ref_type, 0));
9567 break;
9569 default:
9570 gcc_unreachable ();
9572 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9573 /* DATA_REF is null if we've already built the statement. */
9574 if (data_ref)
9576 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9577 new_stmt = gimple_build_assign (vec_dest, data_ref);
9579 new_temp = make_ssa_name (vec_dest, new_stmt);
9580 gimple_set_lhs (new_stmt, new_temp);
9581 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9583 /* 3. Handle explicit realignment if necessary/supported.
9584 Create in loop:
9585 vec_dest = realign_load (msq, lsq, realignment_token) */
9586 if (alignment_support_scheme == dr_explicit_realign_optimized
9587 || alignment_support_scheme == dr_explicit_realign)
9589 lsq = gimple_assign_lhs (new_stmt);
9590 if (!realignment_token)
9591 realignment_token = dataref_ptr;
9592 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9593 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9594 msq, lsq, realignment_token);
9595 new_temp = make_ssa_name (vec_dest, new_stmt);
9596 gimple_assign_set_lhs (new_stmt, new_temp);
9597 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9599 if (alignment_support_scheme == dr_explicit_realign_optimized)
9601 gcc_assert (phi);
9602 if (i == vec_num - 1 && j == ncopies - 1)
9603 add_phi_arg (phi, lsq,
9604 loop_latch_edge (containing_loop),
9605 UNKNOWN_LOCATION);
9606 msq = lsq;
9610 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9612 tree perm_mask = perm_mask_for_reverse (vectype);
9613 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9614 perm_mask, stmt_info, gsi);
9615 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9618 /* Collect vector loads and later create their permutation in
9619 vect_transform_grouped_load (). */
9620 if (grouped_load || slp_perm)
9621 dr_chain.quick_push (new_temp);
9623 /* Store vector loads in the corresponding SLP_NODE. */
9624 if (slp && !slp_perm)
9625 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9627 /* With SLP permutation we load the gaps as well, without
9628 we need to skip the gaps after we manage to fully load
9629 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9630 group_elt += nunits;
9631 if (maybe_ne (group_gap_adj, 0U)
9632 && !slp_perm
9633 && known_eq (group_elt, group_size - group_gap_adj))
9635 poly_wide_int bump_val
9636 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9637 * group_gap_adj);
9638 tree bump = wide_int_to_tree (sizetype, bump_val);
9639 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9640 gsi, stmt_info, bump);
9641 group_elt = 0;
9644 /* Bump the vector pointer to account for a gap or for excess
9645 elements loaded for a permuted SLP load. */
9646 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9648 poly_wide_int bump_val
9649 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9650 * group_gap_adj);
9651 tree bump = wide_int_to_tree (sizetype, bump_val);
9652 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9653 stmt_info, bump);
9657 if (slp && !slp_perm)
9658 continue;
9660 if (slp_perm)
9662 unsigned n_perms;
9663 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9664 gsi, vf, false, &n_perms);
9665 gcc_assert (ok);
9667 else
9669 if (grouped_load)
9671 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9672 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9673 group_size, gsi);
9674 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9676 else
9678 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9681 dr_chain.release ();
9683 if (!slp)
9684 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9686 return true;
9689 /* Function vect_is_simple_cond.
9691 Input:
9692 LOOP - the loop that is being vectorized.
9693 COND - Condition that is checked for simple use.
9695 Output:
9696 *COMP_VECTYPE - the vector type for the comparison.
9697 *DTS - The def types for the arguments of the comparison
9699 Returns whether a COND can be vectorized. Checks whether
9700 condition operands are supportable using vec_is_simple_use. */
9702 static bool
9703 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9704 slp_tree slp_node, tree *comp_vectype,
9705 enum vect_def_type *dts, tree vectype)
9707 tree lhs, rhs;
9708 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9709 slp_tree slp_op;
9711 /* Mask case. */
9712 if (TREE_CODE (cond) == SSA_NAME
9713 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9715 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9716 &slp_op, &dts[0], comp_vectype)
9717 || !*comp_vectype
9718 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9719 return false;
9720 return true;
9723 if (!COMPARISON_CLASS_P (cond))
9724 return false;
9726 lhs = TREE_OPERAND (cond, 0);
9727 rhs = TREE_OPERAND (cond, 1);
9729 if (TREE_CODE (lhs) == SSA_NAME)
9731 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9732 &lhs, &slp_op, &dts[0], &vectype1))
9733 return false;
9735 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9736 || TREE_CODE (lhs) == FIXED_CST)
9737 dts[0] = vect_constant_def;
9738 else
9739 return false;
9741 if (TREE_CODE (rhs) == SSA_NAME)
9743 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9744 &rhs, &slp_op, &dts[1], &vectype2))
9745 return false;
9747 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9748 || TREE_CODE (rhs) == FIXED_CST)
9749 dts[1] = vect_constant_def;
9750 else
9751 return false;
9753 if (vectype1 && vectype2
9754 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9755 TYPE_VECTOR_SUBPARTS (vectype2)))
9756 return false;
9758 *comp_vectype = vectype1 ? vectype1 : vectype2;
9759 /* Invariant comparison. */
9760 if (! *comp_vectype)
9762 tree scalar_type = TREE_TYPE (lhs);
9763 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9764 *comp_vectype = truth_type_for (vectype);
9765 else
9767 /* If we can widen the comparison to match vectype do so. */
9768 if (INTEGRAL_TYPE_P (scalar_type)
9769 && !slp_node
9770 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9771 TYPE_SIZE (TREE_TYPE (vectype))))
9772 scalar_type = build_nonstandard_integer_type
9773 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9774 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9775 slp_node);
9779 return true;
9782 /* vectorizable_condition.
9784 Check if STMT_INFO is conditional modify expression that can be vectorized.
9785 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9786 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9787 at GSI.
9789 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9791 Return true if STMT_INFO is vectorizable in this way. */
9793 static bool
9794 vectorizable_condition (vec_info *vinfo,
9795 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9796 gimple **vec_stmt,
9797 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9799 tree scalar_dest = NULL_TREE;
9800 tree vec_dest = NULL_TREE;
9801 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9802 tree then_clause, else_clause;
9803 tree comp_vectype = NULL_TREE;
9804 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9805 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9806 tree vec_compare;
9807 tree new_temp;
9808 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9809 enum vect_def_type dts[4]
9810 = {vect_unknown_def_type, vect_unknown_def_type,
9811 vect_unknown_def_type, vect_unknown_def_type};
9812 int ndts = 4;
9813 int ncopies;
9814 int vec_num;
9815 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9816 int i;
9817 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9818 vec<tree> vec_oprnds0 = vNULL;
9819 vec<tree> vec_oprnds1 = vNULL;
9820 vec<tree> vec_oprnds2 = vNULL;
9821 vec<tree> vec_oprnds3 = vNULL;
9822 tree vec_cmp_type;
9823 bool masked = false;
9825 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9826 return false;
9828 /* Is vectorizable conditional operation? */
9829 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9830 if (!stmt)
9831 return false;
9833 code = gimple_assign_rhs_code (stmt);
9834 if (code != COND_EXPR)
9835 return false;
9837 stmt_vec_info reduc_info = NULL;
9838 int reduc_index = -1;
9839 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9840 bool for_reduction
9841 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9842 if (for_reduction)
9844 if (STMT_SLP_TYPE (stmt_info))
9845 return false;
9846 reduc_info = info_for_reduction (vinfo, stmt_info);
9847 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9848 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9849 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9850 || reduc_index != -1);
9852 else
9854 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9855 return false;
9858 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9859 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9861 if (slp_node)
9863 ncopies = 1;
9864 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9866 else
9868 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9869 vec_num = 1;
9872 gcc_assert (ncopies >= 1);
9873 if (for_reduction && ncopies > 1)
9874 return false; /* FORNOW */
9876 cond_expr = gimple_assign_rhs1 (stmt);
9878 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9879 &comp_vectype, &dts[0], vectype)
9880 || !comp_vectype)
9881 return false;
9883 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9884 slp_tree then_slp_node, else_slp_node;
9885 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9886 &then_clause, &then_slp_node, &dts[2], &vectype1))
9887 return false;
9888 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9889 &else_clause, &else_slp_node, &dts[3], &vectype2))
9890 return false;
9892 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9893 return false;
9895 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9896 return false;
9898 masked = !COMPARISON_CLASS_P (cond_expr);
9899 vec_cmp_type = truth_type_for (comp_vectype);
9901 if (vec_cmp_type == NULL_TREE)
9902 return false;
9904 cond_code = TREE_CODE (cond_expr);
9905 if (!masked)
9907 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9908 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9911 /* For conditional reductions, the "then" value needs to be the candidate
9912 value calculated by this iteration while the "else" value needs to be
9913 the result carried over from previous iterations. If the COND_EXPR
9914 is the other way around, we need to swap it. */
9915 bool must_invert_cmp_result = false;
9916 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9918 if (masked)
9919 must_invert_cmp_result = true;
9920 else
9922 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9923 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9924 if (new_code == ERROR_MARK)
9925 must_invert_cmp_result = true;
9926 else
9928 cond_code = new_code;
9929 /* Make sure we don't accidentally use the old condition. */
9930 cond_expr = NULL_TREE;
9933 std::swap (then_clause, else_clause);
9936 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9938 /* Boolean values may have another representation in vectors
9939 and therefore we prefer bit operations over comparison for
9940 them (which also works for scalar masks). We store opcodes
9941 to use in bitop1 and bitop2. Statement is vectorized as
9942 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9943 depending on bitop1 and bitop2 arity. */
9944 switch (cond_code)
9946 case GT_EXPR:
9947 bitop1 = BIT_NOT_EXPR;
9948 bitop2 = BIT_AND_EXPR;
9949 break;
9950 case GE_EXPR:
9951 bitop1 = BIT_NOT_EXPR;
9952 bitop2 = BIT_IOR_EXPR;
9953 break;
9954 case LT_EXPR:
9955 bitop1 = BIT_NOT_EXPR;
9956 bitop2 = BIT_AND_EXPR;
9957 std::swap (cond_expr0, cond_expr1);
9958 break;
9959 case LE_EXPR:
9960 bitop1 = BIT_NOT_EXPR;
9961 bitop2 = BIT_IOR_EXPR;
9962 std::swap (cond_expr0, cond_expr1);
9963 break;
9964 case NE_EXPR:
9965 bitop1 = BIT_XOR_EXPR;
9966 break;
9967 case EQ_EXPR:
9968 bitop1 = BIT_XOR_EXPR;
9969 bitop2 = BIT_NOT_EXPR;
9970 break;
9971 default:
9972 return false;
9974 cond_code = SSA_NAME;
9977 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
9978 && reduction_type == EXTRACT_LAST_REDUCTION
9979 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
9981 if (dump_enabled_p ())
9982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9983 "reduction comparison operation not supported.\n");
9984 return false;
9987 if (!vec_stmt)
9989 if (bitop1 != NOP_EXPR)
9991 machine_mode mode = TYPE_MODE (comp_vectype);
9992 optab optab;
9994 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9995 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9996 return false;
9998 if (bitop2 != NOP_EXPR)
10000 optab = optab_for_tree_code (bitop2, comp_vectype,
10001 optab_default);
10002 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10003 return false;
10007 vect_cost_for_stmt kind = vector_stmt;
10008 if (reduction_type == EXTRACT_LAST_REDUCTION)
10009 /* Count one reduction-like operation per vector. */
10010 kind = vec_to_scalar;
10011 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10012 return false;
10014 if (slp_node
10015 && (!vect_maybe_update_slp_op_vectype
10016 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10017 || (op_adjust == 1
10018 && !vect_maybe_update_slp_op_vectype
10019 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10020 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10021 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10023 if (dump_enabled_p ())
10024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10025 "incompatible vector types for invariants\n");
10026 return false;
10029 if (loop_vinfo && for_reduction
10030 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10032 if (reduction_type == EXTRACT_LAST_REDUCTION)
10033 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10034 ncopies * vec_num, vectype, NULL);
10035 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10036 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10038 if (dump_enabled_p ())
10039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10040 "conditional reduction prevents the use"
10041 " of partial vectors.\n");
10042 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10046 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10047 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10048 cost_vec, kind);
10049 return true;
10052 /* Transform. */
10054 if (!slp_node)
10056 vec_oprnds0.create (1);
10057 vec_oprnds1.create (1);
10058 vec_oprnds2.create (1);
10059 vec_oprnds3.create (1);
10062 /* Handle def. */
10063 scalar_dest = gimple_assign_lhs (stmt);
10064 if (reduction_type != EXTRACT_LAST_REDUCTION)
10065 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10067 bool swap_cond_operands = false;
10069 /* See whether another part of the vectorized code applies a loop
10070 mask to the condition, or to its inverse. */
10072 vec_loop_masks *masks = NULL;
10073 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10075 if (reduction_type == EXTRACT_LAST_REDUCTION)
10076 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10077 else
10079 scalar_cond_masked_key cond (cond_expr, ncopies);
10080 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10081 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10082 else
10084 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10085 cond.code = invert_tree_comparison (cond.code, honor_nans);
10086 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10088 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10089 cond_code = cond.code;
10090 swap_cond_operands = true;
10096 /* Handle cond expr. */
10097 if (masked)
10098 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10099 cond_expr, &vec_oprnds0, comp_vectype,
10100 then_clause, &vec_oprnds2, vectype,
10101 reduction_type != EXTRACT_LAST_REDUCTION
10102 ? else_clause : NULL, &vec_oprnds3, vectype);
10103 else
10104 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10105 cond_expr0, &vec_oprnds0, comp_vectype,
10106 cond_expr1, &vec_oprnds1, comp_vectype,
10107 then_clause, &vec_oprnds2, vectype,
10108 reduction_type != EXTRACT_LAST_REDUCTION
10109 ? else_clause : NULL, &vec_oprnds3, vectype);
10111 /* Arguments are ready. Create the new vector stmt. */
10112 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10114 vec_then_clause = vec_oprnds2[i];
10115 if (reduction_type != EXTRACT_LAST_REDUCTION)
10116 vec_else_clause = vec_oprnds3[i];
10118 if (swap_cond_operands)
10119 std::swap (vec_then_clause, vec_else_clause);
10121 if (masked)
10122 vec_compare = vec_cond_lhs;
10123 else
10125 vec_cond_rhs = vec_oprnds1[i];
10126 if (bitop1 == NOP_EXPR)
10128 gimple_seq stmts = NULL;
10129 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10130 vec_cond_lhs, vec_cond_rhs);
10131 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10133 else
10135 new_temp = make_ssa_name (vec_cmp_type);
10136 gassign *new_stmt;
10137 if (bitop1 == BIT_NOT_EXPR)
10138 new_stmt = gimple_build_assign (new_temp, bitop1,
10139 vec_cond_rhs);
10140 else
10141 new_stmt
10142 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10143 vec_cond_rhs);
10144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10145 if (bitop2 == NOP_EXPR)
10146 vec_compare = new_temp;
10147 else if (bitop2 == BIT_NOT_EXPR)
10149 /* Instead of doing ~x ? y : z do x ? z : y. */
10150 vec_compare = new_temp;
10151 std::swap (vec_then_clause, vec_else_clause);
10153 else
10155 vec_compare = make_ssa_name (vec_cmp_type);
10156 new_stmt
10157 = gimple_build_assign (vec_compare, bitop2,
10158 vec_cond_lhs, new_temp);
10159 vect_finish_stmt_generation (vinfo, stmt_info,
10160 new_stmt, gsi);
10165 /* If we decided to apply a loop mask to the result of the vector
10166 comparison, AND the comparison with the mask now. Later passes
10167 should then be able to reuse the AND results between mulitple
10168 vector statements.
10170 For example:
10171 for (int i = 0; i < 100; ++i)
10172 x[i] = y[i] ? z[i] : 10;
10174 results in following optimized GIMPLE:
10176 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10177 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10178 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10179 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10180 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10181 vect_iftmp.11_47, { 10, ... }>;
10183 instead of using a masked and unmasked forms of
10184 vec != { 0, ... } (masked in the MASK_LOAD,
10185 unmasked in the VEC_COND_EXPR). */
10187 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10188 in cases where that's necessary. */
10190 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10192 if (!is_gimple_val (vec_compare))
10194 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10195 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10196 vec_compare);
10197 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10198 vec_compare = vec_compare_name;
10201 if (must_invert_cmp_result)
10203 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10204 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10205 BIT_NOT_EXPR,
10206 vec_compare);
10207 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10208 vec_compare = vec_compare_name;
10211 if (masks)
10213 unsigned vec_num = vec_oprnds0.length ();
10214 tree loop_mask
10215 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10216 vectype, i);
10217 tree tmp2 = make_ssa_name (vec_cmp_type);
10218 gassign *g
10219 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10220 loop_mask);
10221 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10222 vec_compare = tmp2;
10226 gimple *new_stmt;
10227 if (reduction_type == EXTRACT_LAST_REDUCTION)
10229 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10230 tree lhs = gimple_get_lhs (old_stmt);
10231 new_stmt = gimple_build_call_internal
10232 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10233 vec_then_clause);
10234 gimple_call_set_lhs (new_stmt, lhs);
10235 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10236 if (old_stmt == gsi_stmt (*gsi))
10237 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10238 else
10240 /* In this case we're moving the definition to later in the
10241 block. That doesn't matter because the only uses of the
10242 lhs are in phi statements. */
10243 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10244 gsi_remove (&old_gsi, true);
10245 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10248 else
10250 new_temp = make_ssa_name (vec_dest);
10251 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10252 vec_then_clause, vec_else_clause);
10253 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10255 if (slp_node)
10256 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10257 else
10258 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10261 if (!slp_node)
10262 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10264 vec_oprnds0.release ();
10265 vec_oprnds1.release ();
10266 vec_oprnds2.release ();
10267 vec_oprnds3.release ();
10269 return true;
10272 /* vectorizable_comparison.
10274 Check if STMT_INFO is comparison expression that can be vectorized.
10275 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10276 comparison, put it in VEC_STMT, and insert it at GSI.
10278 Return true if STMT_INFO is vectorizable in this way. */
10280 static bool
10281 vectorizable_comparison (vec_info *vinfo,
10282 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10283 gimple **vec_stmt,
10284 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10286 tree lhs, rhs1, rhs2;
10287 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10288 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10289 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10290 tree new_temp;
10291 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10292 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10293 int ndts = 2;
10294 poly_uint64 nunits;
10295 int ncopies;
10296 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10297 int i;
10298 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10299 vec<tree> vec_oprnds0 = vNULL;
10300 vec<tree> vec_oprnds1 = vNULL;
10301 tree mask_type;
10302 tree mask;
10304 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10305 return false;
10307 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10308 return false;
10310 mask_type = vectype;
10311 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10313 if (slp_node)
10314 ncopies = 1;
10315 else
10316 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10318 gcc_assert (ncopies >= 1);
10319 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10320 return false;
10322 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10323 if (!stmt)
10324 return false;
10326 code = gimple_assign_rhs_code (stmt);
10328 if (TREE_CODE_CLASS (code) != tcc_comparison)
10329 return false;
10331 slp_tree slp_rhs1, slp_rhs2;
10332 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10333 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10334 return false;
10336 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10337 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10338 return false;
10340 if (vectype1 && vectype2
10341 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10342 TYPE_VECTOR_SUBPARTS (vectype2)))
10343 return false;
10345 vectype = vectype1 ? vectype1 : vectype2;
10347 /* Invariant comparison. */
10348 if (!vectype)
10350 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10351 vectype = mask_type;
10352 else
10353 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10354 slp_node);
10355 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10356 return false;
10358 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10359 return false;
10361 /* Can't compare mask and non-mask types. */
10362 if (vectype1 && vectype2
10363 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10364 return false;
10366 /* Boolean values may have another representation in vectors
10367 and therefore we prefer bit operations over comparison for
10368 them (which also works for scalar masks). We store opcodes
10369 to use in bitop1 and bitop2. Statement is vectorized as
10370 BITOP2 (rhs1 BITOP1 rhs2) or
10371 rhs1 BITOP2 (BITOP1 rhs2)
10372 depending on bitop1 and bitop2 arity. */
10373 bool swap_p = false;
10374 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10376 if (code == GT_EXPR)
10378 bitop1 = BIT_NOT_EXPR;
10379 bitop2 = BIT_AND_EXPR;
10381 else if (code == GE_EXPR)
10383 bitop1 = BIT_NOT_EXPR;
10384 bitop2 = BIT_IOR_EXPR;
10386 else if (code == LT_EXPR)
10388 bitop1 = BIT_NOT_EXPR;
10389 bitop2 = BIT_AND_EXPR;
10390 swap_p = true;
10392 else if (code == LE_EXPR)
10394 bitop1 = BIT_NOT_EXPR;
10395 bitop2 = BIT_IOR_EXPR;
10396 swap_p = true;
10398 else
10400 bitop1 = BIT_XOR_EXPR;
10401 if (code == EQ_EXPR)
10402 bitop2 = BIT_NOT_EXPR;
10406 if (!vec_stmt)
10408 if (bitop1 == NOP_EXPR)
10410 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10411 return false;
10413 else
10415 machine_mode mode = TYPE_MODE (vectype);
10416 optab optab;
10418 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10419 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10420 return false;
10422 if (bitop2 != NOP_EXPR)
10424 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10425 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10426 return false;
10430 /* Put types on constant and invariant SLP children. */
10431 if (slp_node
10432 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10433 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10435 if (dump_enabled_p ())
10436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10437 "incompatible vector types for invariants\n");
10438 return false;
10441 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10442 vect_model_simple_cost (vinfo, stmt_info,
10443 ncopies * (1 + (bitop2 != NOP_EXPR)),
10444 dts, ndts, slp_node, cost_vec);
10445 return true;
10448 /* Transform. */
10449 if (!slp_node)
10451 vec_oprnds0.create (1);
10452 vec_oprnds1.create (1);
10455 /* Handle def. */
10456 lhs = gimple_assign_lhs (stmt);
10457 mask = vect_create_destination_var (lhs, mask_type);
10459 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10460 rhs1, &vec_oprnds0, vectype,
10461 rhs2, &vec_oprnds1, vectype);
10462 if (swap_p)
10463 std::swap (vec_oprnds0, vec_oprnds1);
10465 /* Arguments are ready. Create the new vector stmt. */
10466 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10468 gimple *new_stmt;
10469 vec_rhs2 = vec_oprnds1[i];
10471 new_temp = make_ssa_name (mask);
10472 if (bitop1 == NOP_EXPR)
10474 new_stmt = gimple_build_assign (new_temp, code,
10475 vec_rhs1, vec_rhs2);
10476 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10478 else
10480 if (bitop1 == BIT_NOT_EXPR)
10481 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10482 else
10483 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10484 vec_rhs2);
10485 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10486 if (bitop2 != NOP_EXPR)
10488 tree res = make_ssa_name (mask);
10489 if (bitop2 == BIT_NOT_EXPR)
10490 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10491 else
10492 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10493 new_temp);
10494 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10497 if (slp_node)
10498 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10499 else
10500 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10503 if (!slp_node)
10504 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10506 vec_oprnds0.release ();
10507 vec_oprnds1.release ();
10509 return true;
10512 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10513 can handle all live statements in the node. Otherwise return true
10514 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10515 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10517 static bool
10518 can_vectorize_live_stmts (vec_info *vinfo,
10519 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10520 slp_tree slp_node, slp_instance slp_node_instance,
10521 bool vec_stmt_p,
10522 stmt_vector_for_cost *cost_vec)
10524 if (slp_node)
10526 stmt_vec_info slp_stmt_info;
10527 unsigned int i;
10528 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10530 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10531 && !vectorizable_live_operation (vinfo,
10532 slp_stmt_info, gsi, slp_node,
10533 slp_node_instance, i,
10534 vec_stmt_p, cost_vec))
10535 return false;
10538 else if (STMT_VINFO_LIVE_P (stmt_info)
10539 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10540 slp_node, slp_node_instance, -1,
10541 vec_stmt_p, cost_vec))
10542 return false;
10544 return true;
10547 /* Make sure the statement is vectorizable. */
10549 opt_result
10550 vect_analyze_stmt (vec_info *vinfo,
10551 stmt_vec_info stmt_info, bool *need_to_vectorize,
10552 slp_tree node, slp_instance node_instance,
10553 stmt_vector_for_cost *cost_vec)
10555 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10556 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10557 bool ok;
10558 gimple_seq pattern_def_seq;
10560 if (dump_enabled_p ())
10561 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10562 stmt_info->stmt);
10564 if (gimple_has_volatile_ops (stmt_info->stmt))
10565 return opt_result::failure_at (stmt_info->stmt,
10566 "not vectorized:"
10567 " stmt has volatile operands: %G\n",
10568 stmt_info->stmt);
10570 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10571 && node == NULL
10572 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10574 gimple_stmt_iterator si;
10576 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10578 stmt_vec_info pattern_def_stmt_info
10579 = vinfo->lookup_stmt (gsi_stmt (si));
10580 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10581 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10583 /* Analyze def stmt of STMT if it's a pattern stmt. */
10584 if (dump_enabled_p ())
10585 dump_printf_loc (MSG_NOTE, vect_location,
10586 "==> examining pattern def statement: %G",
10587 pattern_def_stmt_info->stmt);
10589 opt_result res
10590 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10591 need_to_vectorize, node, node_instance,
10592 cost_vec);
10593 if (!res)
10594 return res;
10599 /* Skip stmts that do not need to be vectorized. In loops this is expected
10600 to include:
10601 - the COND_EXPR which is the loop exit condition
10602 - any LABEL_EXPRs in the loop
10603 - computations that are used only for array indexing or loop control.
10604 In basic blocks we only analyze statements that are a part of some SLP
10605 instance, therefore, all the statements are relevant.
10607 Pattern statement needs to be analyzed instead of the original statement
10608 if the original statement is not relevant. Otherwise, we analyze both
10609 statements. In basic blocks we are called from some SLP instance
10610 traversal, don't analyze pattern stmts instead, the pattern stmts
10611 already will be part of SLP instance. */
10613 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10614 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10615 && !STMT_VINFO_LIVE_P (stmt_info))
10617 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10618 && pattern_stmt_info
10619 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10620 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10622 /* Analyze PATTERN_STMT instead of the original stmt. */
10623 stmt_info = pattern_stmt_info;
10624 if (dump_enabled_p ())
10625 dump_printf_loc (MSG_NOTE, vect_location,
10626 "==> examining pattern statement: %G",
10627 stmt_info->stmt);
10629 else
10631 if (dump_enabled_p ())
10632 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10634 return opt_result::success ();
10637 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10638 && node == NULL
10639 && pattern_stmt_info
10640 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10641 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10643 /* Analyze PATTERN_STMT too. */
10644 if (dump_enabled_p ())
10645 dump_printf_loc (MSG_NOTE, vect_location,
10646 "==> examining pattern statement: %G",
10647 pattern_stmt_info->stmt);
10649 opt_result res
10650 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10651 node_instance, cost_vec);
10652 if (!res)
10653 return res;
10656 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10658 case vect_internal_def:
10659 break;
10661 case vect_reduction_def:
10662 case vect_nested_cycle:
10663 gcc_assert (!bb_vinfo
10664 && (relevance == vect_used_in_outer
10665 || relevance == vect_used_in_outer_by_reduction
10666 || relevance == vect_used_by_reduction
10667 || relevance == vect_unused_in_scope
10668 || relevance == vect_used_only_live));
10669 break;
10671 case vect_induction_def:
10672 gcc_assert (!bb_vinfo);
10673 break;
10675 case vect_constant_def:
10676 case vect_external_def:
10677 case vect_unknown_def_type:
10678 default:
10679 gcc_unreachable ();
10682 if (STMT_VINFO_RELEVANT_P (stmt_info))
10684 tree type = gimple_expr_type (stmt_info->stmt);
10685 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10686 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10687 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10688 || (call && gimple_call_lhs (call) == NULL_TREE));
10689 *need_to_vectorize = true;
10692 if (PURE_SLP_STMT (stmt_info) && !node)
10694 if (dump_enabled_p ())
10695 dump_printf_loc (MSG_NOTE, vect_location,
10696 "handled only by SLP analysis\n");
10697 return opt_result::success ();
10700 ok = true;
10701 if (!bb_vinfo
10702 && (STMT_VINFO_RELEVANT_P (stmt_info)
10703 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10704 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10705 -mveclibabi= takes preference over library functions with
10706 the simd attribute. */
10707 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10708 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10709 cost_vec)
10710 || vectorizable_conversion (vinfo, stmt_info,
10711 NULL, NULL, node, cost_vec)
10712 || vectorizable_operation (vinfo, stmt_info,
10713 NULL, NULL, node, cost_vec)
10714 || vectorizable_assignment (vinfo, stmt_info,
10715 NULL, NULL, node, cost_vec)
10716 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10717 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10718 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10719 node, node_instance, cost_vec)
10720 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10721 NULL, node, cost_vec)
10722 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10723 || vectorizable_condition (vinfo, stmt_info,
10724 NULL, NULL, node, cost_vec)
10725 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10726 cost_vec)
10727 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10728 stmt_info, NULL, node));
10729 else
10731 if (bb_vinfo)
10732 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10733 || vectorizable_simd_clone_call (vinfo, stmt_info,
10734 NULL, NULL, node, cost_vec)
10735 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10736 cost_vec)
10737 || vectorizable_shift (vinfo, stmt_info,
10738 NULL, NULL, node, cost_vec)
10739 || vectorizable_operation (vinfo, stmt_info,
10740 NULL, NULL, node, cost_vec)
10741 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10742 cost_vec)
10743 || vectorizable_load (vinfo, stmt_info,
10744 NULL, NULL, node, cost_vec)
10745 || vectorizable_store (vinfo, stmt_info,
10746 NULL, NULL, node, cost_vec)
10747 || vectorizable_condition (vinfo, stmt_info,
10748 NULL, NULL, node, cost_vec)
10749 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10750 cost_vec));
10753 if (!ok)
10754 return opt_result::failure_at (stmt_info->stmt,
10755 "not vectorized:"
10756 " relevant stmt not supported: %G",
10757 stmt_info->stmt);
10759 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10760 need extra handling, except for vectorizable reductions. */
10761 if (!bb_vinfo
10762 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10763 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10764 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10765 stmt_info, NULL, node, node_instance,
10766 false, cost_vec))
10767 return opt_result::failure_at (stmt_info->stmt,
10768 "not vectorized:"
10769 " live stmt not supported: %G",
10770 stmt_info->stmt);
10772 return opt_result::success ();
10776 /* Function vect_transform_stmt.
10778 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10780 bool
10781 vect_transform_stmt (vec_info *vinfo,
10782 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10783 slp_tree slp_node, slp_instance slp_node_instance)
10785 bool is_store = false;
10786 gimple *vec_stmt = NULL;
10787 bool done;
10789 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10791 switch (STMT_VINFO_TYPE (stmt_info))
10793 case type_demotion_vec_info_type:
10794 case type_promotion_vec_info_type:
10795 case type_conversion_vec_info_type:
10796 done = vectorizable_conversion (vinfo, stmt_info,
10797 gsi, &vec_stmt, slp_node, NULL);
10798 gcc_assert (done);
10799 break;
10801 case induc_vec_info_type:
10802 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10803 stmt_info, &vec_stmt, slp_node,
10804 NULL);
10805 gcc_assert (done);
10806 break;
10808 case shift_vec_info_type:
10809 done = vectorizable_shift (vinfo, stmt_info,
10810 gsi, &vec_stmt, slp_node, NULL);
10811 gcc_assert (done);
10812 break;
10814 case op_vec_info_type:
10815 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10816 NULL);
10817 gcc_assert (done);
10818 break;
10820 case assignment_vec_info_type:
10821 done = vectorizable_assignment (vinfo, stmt_info,
10822 gsi, &vec_stmt, slp_node, NULL);
10823 gcc_assert (done);
10824 break;
10826 case load_vec_info_type:
10827 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10828 NULL);
10829 gcc_assert (done);
10830 break;
10832 case store_vec_info_type:
10833 done = vectorizable_store (vinfo, stmt_info,
10834 gsi, &vec_stmt, slp_node, NULL);
10835 gcc_assert (done);
10836 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10838 /* In case of interleaving, the whole chain is vectorized when the
10839 last store in the chain is reached. Store stmts before the last
10840 one are skipped, and there vec_stmt_info shouldn't be freed
10841 meanwhile. */
10842 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10843 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10844 is_store = true;
10846 else
10847 is_store = true;
10848 break;
10850 case condition_vec_info_type:
10851 done = vectorizable_condition (vinfo, stmt_info,
10852 gsi, &vec_stmt, slp_node, NULL);
10853 gcc_assert (done);
10854 break;
10856 case comparison_vec_info_type:
10857 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10858 slp_node, NULL);
10859 gcc_assert (done);
10860 break;
10862 case call_vec_info_type:
10863 done = vectorizable_call (vinfo, stmt_info,
10864 gsi, &vec_stmt, slp_node, NULL);
10865 break;
10867 case call_simd_clone_vec_info_type:
10868 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10869 slp_node, NULL);
10870 break;
10872 case reduc_vec_info_type:
10873 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10874 gsi, &vec_stmt, slp_node);
10875 gcc_assert (done);
10876 break;
10878 case cycle_phi_info_type:
10879 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10880 &vec_stmt, slp_node, slp_node_instance);
10881 gcc_assert (done);
10882 break;
10884 case lc_phi_info_type:
10885 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10886 stmt_info, &vec_stmt, slp_node);
10887 gcc_assert (done);
10888 break;
10890 default:
10891 if (!STMT_VINFO_LIVE_P (stmt_info))
10893 if (dump_enabled_p ())
10894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10895 "stmt not supported.\n");
10896 gcc_unreachable ();
10898 done = true;
10901 if (!slp_node && vec_stmt)
10902 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
10904 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
10905 return is_store;
10907 /* Handle stmts whose DEF is used outside the loop-nest that is
10908 being vectorized. */
10909 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
10910 slp_node_instance, true, NULL);
10911 gcc_assert (done);
10913 return false;
10917 /* Remove a group of stores (for SLP or interleaving), free their
10918 stmt_vec_info. */
10920 void
10921 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
10923 stmt_vec_info next_stmt_info = first_stmt_info;
10925 while (next_stmt_info)
10927 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10928 next_stmt_info = vect_orig_stmt (next_stmt_info);
10929 /* Free the attached stmt_vec_info and remove the stmt. */
10930 vinfo->remove_stmt (next_stmt_info);
10931 next_stmt_info = tmp;
10935 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10936 elements of type SCALAR_TYPE, or null if the target doesn't support
10937 such a type.
10939 If NUNITS is zero, return a vector type that contains elements of
10940 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10942 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10943 for this vectorization region and want to "autodetect" the best choice.
10944 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10945 and we want the new type to be interoperable with it. PREVAILING_MODE
10946 in this case can be a scalar integer mode or a vector mode; when it
10947 is a vector mode, the function acts like a tree-level version of
10948 related_vector_mode. */
10950 tree
10951 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
10952 tree scalar_type, poly_uint64 nunits)
10954 tree orig_scalar_type = scalar_type;
10955 scalar_mode inner_mode;
10956 machine_mode simd_mode;
10957 tree vectype;
10959 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
10960 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
10961 return NULL_TREE;
10963 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
10965 /* For vector types of elements whose mode precision doesn't
10966 match their types precision we use a element type of mode
10967 precision. The vectorization routines will have to make sure
10968 they support the proper result truncation/extension.
10969 We also make sure to build vector types with INTEGER_TYPE
10970 component type only. */
10971 if (INTEGRAL_TYPE_P (scalar_type)
10972 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
10973 || TREE_CODE (scalar_type) != INTEGER_TYPE))
10974 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10975 TYPE_UNSIGNED (scalar_type));
10977 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10978 When the component mode passes the above test simply use a type
10979 corresponding to that mode. The theory is that any use that
10980 would cause problems with this will disable vectorization anyway. */
10981 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10982 && !INTEGRAL_TYPE_P (scalar_type))
10983 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10985 /* We can't build a vector type of elements with alignment bigger than
10986 their size. */
10987 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10988 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10989 TYPE_UNSIGNED (scalar_type));
10991 /* If we felt back to using the mode fail if there was
10992 no scalar type for it. */
10993 if (scalar_type == NULL_TREE)
10994 return NULL_TREE;
10996 /* If no prevailing mode was supplied, use the mode the target prefers.
10997 Otherwise lookup a vector mode based on the prevailing mode. */
10998 if (prevailing_mode == VOIDmode)
11000 gcc_assert (known_eq (nunits, 0U));
11001 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11002 if (SCALAR_INT_MODE_P (simd_mode))
11004 /* Traditional behavior is not to take the integer mode
11005 literally, but simply to use it as a way of determining
11006 the vector size. It is up to mode_for_vector to decide
11007 what the TYPE_MODE should be.
11009 Note that nunits == 1 is allowed in order to support single
11010 element vector types. */
11011 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11012 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11013 return NULL_TREE;
11016 else if (SCALAR_INT_MODE_P (prevailing_mode)
11017 || !related_vector_mode (prevailing_mode,
11018 inner_mode, nunits).exists (&simd_mode))
11020 /* Fall back to using mode_for_vector, mostly in the hope of being
11021 able to use an integer mode. */
11022 if (known_eq (nunits, 0U)
11023 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11024 return NULL_TREE;
11026 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11027 return NULL_TREE;
11030 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11032 /* In cases where the mode was chosen by mode_for_vector, check that
11033 the target actually supports the chosen mode, or that it at least
11034 allows the vector mode to be replaced by a like-sized integer. */
11035 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11036 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11037 return NULL_TREE;
11039 /* Re-attach the address-space qualifier if we canonicalized the scalar
11040 type. */
11041 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11042 return build_qualified_type
11043 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11045 return vectype;
11048 /* Function get_vectype_for_scalar_type.
11050 Returns the vector type corresponding to SCALAR_TYPE as supported
11051 by the target. If GROUP_SIZE is nonzero and we're performing BB
11052 vectorization, make sure that the number of elements in the vector
11053 is no bigger than GROUP_SIZE. */
11055 tree
11056 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11057 unsigned int group_size)
11059 /* For BB vectorization, we should always have a group size once we've
11060 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11061 are tentative requests during things like early data reference
11062 analysis and pattern recognition. */
11063 if (is_a <bb_vec_info> (vinfo))
11064 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11065 else
11066 group_size = 0;
11068 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11069 scalar_type);
11070 if (vectype && vinfo->vector_mode == VOIDmode)
11071 vinfo->vector_mode = TYPE_MODE (vectype);
11073 /* Register the natural choice of vector type, before the group size
11074 has been applied. */
11075 if (vectype)
11076 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11078 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11079 try again with an explicit number of elements. */
11080 if (vectype
11081 && group_size
11082 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11084 /* Start with the biggest number of units that fits within
11085 GROUP_SIZE and halve it until we find a valid vector type.
11086 Usually either the first attempt will succeed or all will
11087 fail (in the latter case because GROUP_SIZE is too small
11088 for the target), but it's possible that a target could have
11089 a hole between supported vector types.
11091 If GROUP_SIZE is not a power of 2, this has the effect of
11092 trying the largest power of 2 that fits within the group,
11093 even though the group is not a multiple of that vector size.
11094 The BB vectorizer will then try to carve up the group into
11095 smaller pieces. */
11096 unsigned int nunits = 1 << floor_log2 (group_size);
11099 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11100 scalar_type, nunits);
11101 nunits /= 2;
11103 while (nunits > 1 && !vectype);
11106 return vectype;
11109 /* Return the vector type corresponding to SCALAR_TYPE as supported
11110 by the target. NODE, if nonnull, is the SLP tree node that will
11111 use the returned vector type. */
11113 tree
11114 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11116 unsigned int group_size = 0;
11117 if (node)
11118 group_size = SLP_TREE_LANES (node);
11119 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11122 /* Function get_mask_type_for_scalar_type.
11124 Returns the mask type corresponding to a result of comparison
11125 of vectors of specified SCALAR_TYPE as supported by target.
11126 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11127 make sure that the number of elements in the vector is no bigger
11128 than GROUP_SIZE. */
11130 tree
11131 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11132 unsigned int group_size)
11134 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11136 if (!vectype)
11137 return NULL;
11139 return truth_type_for (vectype);
11142 /* Function get_same_sized_vectype
11144 Returns a vector type corresponding to SCALAR_TYPE of size
11145 VECTOR_TYPE if supported by the target. */
11147 tree
11148 get_same_sized_vectype (tree scalar_type, tree vector_type)
11150 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11151 return truth_type_for (vector_type);
11153 poly_uint64 nunits;
11154 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11155 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11156 return NULL_TREE;
11158 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11159 scalar_type, nunits);
11162 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11163 would not change the chosen vector modes. */
11165 bool
11166 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11168 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11169 i != vinfo->used_vector_modes.end (); ++i)
11170 if (!VECTOR_MODE_P (*i)
11171 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11172 return false;
11173 return true;
11176 /* Function vect_is_simple_use.
11178 Input:
11179 VINFO - the vect info of the loop or basic block that is being vectorized.
11180 OPERAND - operand in the loop or bb.
11181 Output:
11182 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11183 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11184 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11185 the definition could be anywhere in the function
11186 DT - the type of definition
11188 Returns whether a stmt with OPERAND can be vectorized.
11189 For loops, supportable operands are constants, loop invariants, and operands
11190 that are defined by the current iteration of the loop. Unsupportable
11191 operands are those that are defined by a previous iteration of the loop (as
11192 is the case in reduction/induction computations).
11193 For basic blocks, supportable operands are constants and bb invariants.
11194 For now, operands defined outside the basic block are not supported. */
11196 bool
11197 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11198 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11200 if (def_stmt_info_out)
11201 *def_stmt_info_out = NULL;
11202 if (def_stmt_out)
11203 *def_stmt_out = NULL;
11204 *dt = vect_unknown_def_type;
11206 if (dump_enabled_p ())
11208 dump_printf_loc (MSG_NOTE, vect_location,
11209 "vect_is_simple_use: operand ");
11210 if (TREE_CODE (operand) == SSA_NAME
11211 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11212 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11213 else
11214 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11217 if (CONSTANT_CLASS_P (operand))
11218 *dt = vect_constant_def;
11219 else if (is_gimple_min_invariant (operand))
11220 *dt = vect_external_def;
11221 else if (TREE_CODE (operand) != SSA_NAME)
11222 *dt = vect_unknown_def_type;
11223 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11224 *dt = vect_external_def;
11225 else
11227 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11228 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11229 if (!stmt_vinfo)
11230 *dt = vect_external_def;
11231 else
11233 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11234 def_stmt = stmt_vinfo->stmt;
11235 switch (gimple_code (def_stmt))
11237 case GIMPLE_PHI:
11238 case GIMPLE_ASSIGN:
11239 case GIMPLE_CALL:
11240 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11241 break;
11242 default:
11243 *dt = vect_unknown_def_type;
11244 break;
11246 if (def_stmt_info_out)
11247 *def_stmt_info_out = stmt_vinfo;
11249 if (def_stmt_out)
11250 *def_stmt_out = def_stmt;
11253 if (dump_enabled_p ())
11255 dump_printf (MSG_NOTE, ", type of def: ");
11256 switch (*dt)
11258 case vect_uninitialized_def:
11259 dump_printf (MSG_NOTE, "uninitialized\n");
11260 break;
11261 case vect_constant_def:
11262 dump_printf (MSG_NOTE, "constant\n");
11263 break;
11264 case vect_external_def:
11265 dump_printf (MSG_NOTE, "external\n");
11266 break;
11267 case vect_internal_def:
11268 dump_printf (MSG_NOTE, "internal\n");
11269 break;
11270 case vect_induction_def:
11271 dump_printf (MSG_NOTE, "induction\n");
11272 break;
11273 case vect_reduction_def:
11274 dump_printf (MSG_NOTE, "reduction\n");
11275 break;
11276 case vect_double_reduction_def:
11277 dump_printf (MSG_NOTE, "double reduction\n");
11278 break;
11279 case vect_nested_cycle:
11280 dump_printf (MSG_NOTE, "nested cycle\n");
11281 break;
11282 case vect_unknown_def_type:
11283 dump_printf (MSG_NOTE, "unknown\n");
11284 break;
11288 if (*dt == vect_unknown_def_type)
11290 if (dump_enabled_p ())
11291 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11292 "Unsupported pattern.\n");
11293 return false;
11296 return true;
11299 /* Function vect_is_simple_use.
11301 Same as vect_is_simple_use but also determines the vector operand
11302 type of OPERAND and stores it to *VECTYPE. If the definition of
11303 OPERAND is vect_uninitialized_def, vect_constant_def or
11304 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11305 is responsible to compute the best suited vector type for the
11306 scalar operand. */
11308 bool
11309 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11310 tree *vectype, stmt_vec_info *def_stmt_info_out,
11311 gimple **def_stmt_out)
11313 stmt_vec_info def_stmt_info;
11314 gimple *def_stmt;
11315 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11316 return false;
11318 if (def_stmt_out)
11319 *def_stmt_out = def_stmt;
11320 if (def_stmt_info_out)
11321 *def_stmt_info_out = def_stmt_info;
11323 /* Now get a vector type if the def is internal, otherwise supply
11324 NULL_TREE and leave it up to the caller to figure out a proper
11325 type for the use stmt. */
11326 if (*dt == vect_internal_def
11327 || *dt == vect_induction_def
11328 || *dt == vect_reduction_def
11329 || *dt == vect_double_reduction_def
11330 || *dt == vect_nested_cycle)
11332 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11333 gcc_assert (*vectype != NULL_TREE);
11334 if (dump_enabled_p ())
11335 dump_printf_loc (MSG_NOTE, vect_location,
11336 "vect_is_simple_use: vectype %T\n", *vectype);
11338 else if (*dt == vect_uninitialized_def
11339 || *dt == vect_constant_def
11340 || *dt == vect_external_def)
11341 *vectype = NULL_TREE;
11342 else
11343 gcc_unreachable ();
11345 return true;
11348 /* Function vect_is_simple_use.
11350 Same as vect_is_simple_use but determines the operand by operand
11351 position OPERAND from either STMT or SLP_NODE, filling in *OP
11352 and *SLP_DEF (when SLP_NODE is not NULL). */
11354 bool
11355 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11356 unsigned operand, tree *op, slp_tree *slp_def,
11357 enum vect_def_type *dt,
11358 tree *vectype, stmt_vec_info *def_stmt_info_out)
11360 if (slp_node)
11362 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11363 *slp_def = child;
11364 *vectype = SLP_TREE_VECTYPE (child);
11365 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11367 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11368 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11370 else
11372 if (def_stmt_info_out)
11373 *def_stmt_info_out = NULL;
11374 *op = SLP_TREE_SCALAR_OPS (child)[0];
11375 *dt = SLP_TREE_DEF_TYPE (child);
11376 return true;
11379 else
11381 *slp_def = NULL;
11382 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11384 if (gimple_assign_rhs_code (ass) == COND_EXPR
11385 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11387 if (operand < 2)
11388 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11389 else
11390 *op = gimple_op (ass, operand);
11392 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11393 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11394 else
11395 *op = gimple_op (ass, operand + 1);
11397 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11399 if (gimple_call_internal_p (call)
11400 && internal_store_fn_p (gimple_call_internal_fn (call)))
11401 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11402 (call));
11403 *op = gimple_call_arg (call, operand);
11405 else
11406 gcc_unreachable ();
11407 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11411 /* If OP is not NULL and is external or constant update its vector
11412 type with VECTYPE. Returns true if successful or false if not,
11413 for example when conflicting vector types are present. */
11415 bool
11416 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11418 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11419 return true;
11420 if (SLP_TREE_VECTYPE (op))
11421 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11422 SLP_TREE_VECTYPE (op) = vectype;
11423 return true;
11426 /* Function supportable_widening_operation
11428 Check whether an operation represented by the code CODE is a
11429 widening operation that is supported by the target platform in
11430 vector form (i.e., when operating on arguments of type VECTYPE_IN
11431 producing a result of type VECTYPE_OUT).
11433 Widening operations we currently support are NOP (CONVERT), FLOAT,
11434 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11435 are supported by the target platform either directly (via vector
11436 tree-codes), or via target builtins.
11438 Output:
11439 - CODE1 and CODE2 are codes of vector operations to be used when
11440 vectorizing the operation, if available.
11441 - MULTI_STEP_CVT determines the number of required intermediate steps in
11442 case of multi-step conversion (like char->short->int - in that case
11443 MULTI_STEP_CVT will be 1).
11444 - INTERM_TYPES contains the intermediate type required to perform the
11445 widening operation (short in the above example). */
11447 bool
11448 supportable_widening_operation (vec_info *vinfo,
11449 enum tree_code code, stmt_vec_info stmt_info,
11450 tree vectype_out, tree vectype_in,
11451 enum tree_code *code1, enum tree_code *code2,
11452 int *multi_step_cvt,
11453 vec<tree> *interm_types)
11455 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11456 class loop *vect_loop = NULL;
11457 machine_mode vec_mode;
11458 enum insn_code icode1, icode2;
11459 optab optab1, optab2;
11460 tree vectype = vectype_in;
11461 tree wide_vectype = vectype_out;
11462 enum tree_code c1, c2;
11463 int i;
11464 tree prev_type, intermediate_type;
11465 machine_mode intermediate_mode, prev_mode;
11466 optab optab3, optab4;
11468 *multi_step_cvt = 0;
11469 if (loop_info)
11470 vect_loop = LOOP_VINFO_LOOP (loop_info);
11472 switch (code)
11474 case WIDEN_MULT_EXPR:
11475 /* The result of a vectorized widening operation usually requires
11476 two vectors (because the widened results do not fit into one vector).
11477 The generated vector results would normally be expected to be
11478 generated in the same order as in the original scalar computation,
11479 i.e. if 8 results are generated in each vector iteration, they are
11480 to be organized as follows:
11481 vect1: [res1,res2,res3,res4],
11482 vect2: [res5,res6,res7,res8].
11484 However, in the special case that the result of the widening
11485 operation is used in a reduction computation only, the order doesn't
11486 matter (because when vectorizing a reduction we change the order of
11487 the computation). Some targets can take advantage of this and
11488 generate more efficient code. For example, targets like Altivec,
11489 that support widen_mult using a sequence of {mult_even,mult_odd}
11490 generate the following vectors:
11491 vect1: [res1,res3,res5,res7],
11492 vect2: [res2,res4,res6,res8].
11494 When vectorizing outer-loops, we execute the inner-loop sequentially
11495 (each vectorized inner-loop iteration contributes to VF outer-loop
11496 iterations in parallel). We therefore don't allow to change the
11497 order of the computation in the inner-loop during outer-loop
11498 vectorization. */
11499 /* TODO: Another case in which order doesn't *really* matter is when we
11500 widen and then contract again, e.g. (short)((int)x * y >> 8).
11501 Normally, pack_trunc performs an even/odd permute, whereas the
11502 repack from an even/odd expansion would be an interleave, which
11503 would be significantly simpler for e.g. AVX2. */
11504 /* In any case, in order to avoid duplicating the code below, recurse
11505 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11506 are properly set up for the caller. If we fail, we'll continue with
11507 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11508 if (vect_loop
11509 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11510 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11511 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11512 stmt_info, vectype_out,
11513 vectype_in, code1, code2,
11514 multi_step_cvt, interm_types))
11516 /* Elements in a vector with vect_used_by_reduction property cannot
11517 be reordered if the use chain with this property does not have the
11518 same operation. One such an example is s += a * b, where elements
11519 in a and b cannot be reordered. Here we check if the vector defined
11520 by STMT is only directly used in the reduction statement. */
11521 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11522 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11523 if (use_stmt_info
11524 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11525 return true;
11527 c1 = VEC_WIDEN_MULT_LO_EXPR;
11528 c2 = VEC_WIDEN_MULT_HI_EXPR;
11529 break;
11531 case DOT_PROD_EXPR:
11532 c1 = DOT_PROD_EXPR;
11533 c2 = DOT_PROD_EXPR;
11534 break;
11536 case SAD_EXPR:
11537 c1 = SAD_EXPR;
11538 c2 = SAD_EXPR;
11539 break;
11541 case VEC_WIDEN_MULT_EVEN_EXPR:
11542 /* Support the recursion induced just above. */
11543 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11544 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11545 break;
11547 case WIDEN_LSHIFT_EXPR:
11548 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11549 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11550 break;
11552 CASE_CONVERT:
11553 c1 = VEC_UNPACK_LO_EXPR;
11554 c2 = VEC_UNPACK_HI_EXPR;
11555 break;
11557 case FLOAT_EXPR:
11558 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11559 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11560 break;
11562 case FIX_TRUNC_EXPR:
11563 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11564 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11565 break;
11567 default:
11568 gcc_unreachable ();
11571 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11572 std::swap (c1, c2);
11574 if (code == FIX_TRUNC_EXPR)
11576 /* The signedness is determined from output operand. */
11577 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11578 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11580 else if (CONVERT_EXPR_CODE_P (code)
11581 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11582 && VECTOR_BOOLEAN_TYPE_P (vectype)
11583 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11584 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11586 /* If the input and result modes are the same, a different optab
11587 is needed where we pass in the number of units in vectype. */
11588 optab1 = vec_unpacks_sbool_lo_optab;
11589 optab2 = vec_unpacks_sbool_hi_optab;
11591 else
11593 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11594 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11597 if (!optab1 || !optab2)
11598 return false;
11600 vec_mode = TYPE_MODE (vectype);
11601 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11602 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11603 return false;
11605 *code1 = c1;
11606 *code2 = c2;
11608 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11609 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11611 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11612 return true;
11613 /* For scalar masks we may have different boolean
11614 vector types having the same QImode. Thus we
11615 add additional check for elements number. */
11616 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11617 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11618 return true;
11621 /* Check if it's a multi-step conversion that can be done using intermediate
11622 types. */
11624 prev_type = vectype;
11625 prev_mode = vec_mode;
11627 if (!CONVERT_EXPR_CODE_P (code))
11628 return false;
11630 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11631 intermediate steps in promotion sequence. We try
11632 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11633 not. */
11634 interm_types->create (MAX_INTERM_CVT_STEPS);
11635 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11637 intermediate_mode = insn_data[icode1].operand[0].mode;
11638 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11639 intermediate_type
11640 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11641 else
11642 intermediate_type
11643 = lang_hooks.types.type_for_mode (intermediate_mode,
11644 TYPE_UNSIGNED (prev_type));
11646 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11647 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11648 && intermediate_mode == prev_mode
11649 && SCALAR_INT_MODE_P (prev_mode))
11651 /* If the input and result modes are the same, a different optab
11652 is needed where we pass in the number of units in vectype. */
11653 optab3 = vec_unpacks_sbool_lo_optab;
11654 optab4 = vec_unpacks_sbool_hi_optab;
11656 else
11658 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11659 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11662 if (!optab3 || !optab4
11663 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11664 || insn_data[icode1].operand[0].mode != intermediate_mode
11665 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11666 || insn_data[icode2].operand[0].mode != intermediate_mode
11667 || ((icode1 = optab_handler (optab3, intermediate_mode))
11668 == CODE_FOR_nothing)
11669 || ((icode2 = optab_handler (optab4, intermediate_mode))
11670 == CODE_FOR_nothing))
11671 break;
11673 interm_types->quick_push (intermediate_type);
11674 (*multi_step_cvt)++;
11676 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11677 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11679 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11680 return true;
11681 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11682 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11683 return true;
11686 prev_type = intermediate_type;
11687 prev_mode = intermediate_mode;
11690 interm_types->release ();
11691 return false;
11695 /* Function supportable_narrowing_operation
11697 Check whether an operation represented by the code CODE is a
11698 narrowing operation that is supported by the target platform in
11699 vector form (i.e., when operating on arguments of type VECTYPE_IN
11700 and producing a result of type VECTYPE_OUT).
11702 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11703 and FLOAT. This function checks if these operations are supported by
11704 the target platform directly via vector tree-codes.
11706 Output:
11707 - CODE1 is the code of a vector operation to be used when
11708 vectorizing the operation, if available.
11709 - MULTI_STEP_CVT determines the number of required intermediate steps in
11710 case of multi-step conversion (like int->short->char - in that case
11711 MULTI_STEP_CVT will be 1).
11712 - INTERM_TYPES contains the intermediate type required to perform the
11713 narrowing operation (short in the above example). */
11715 bool
11716 supportable_narrowing_operation (enum tree_code code,
11717 tree vectype_out, tree vectype_in,
11718 enum tree_code *code1, int *multi_step_cvt,
11719 vec<tree> *interm_types)
11721 machine_mode vec_mode;
11722 enum insn_code icode1;
11723 optab optab1, interm_optab;
11724 tree vectype = vectype_in;
11725 tree narrow_vectype = vectype_out;
11726 enum tree_code c1;
11727 tree intermediate_type, prev_type;
11728 machine_mode intermediate_mode, prev_mode;
11729 int i;
11730 bool uns;
11732 *multi_step_cvt = 0;
11733 switch (code)
11735 CASE_CONVERT:
11736 c1 = VEC_PACK_TRUNC_EXPR;
11737 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11738 && VECTOR_BOOLEAN_TYPE_P (vectype)
11739 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11740 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11741 optab1 = vec_pack_sbool_trunc_optab;
11742 else
11743 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11744 break;
11746 case FIX_TRUNC_EXPR:
11747 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11748 /* The signedness is determined from output operand. */
11749 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11750 break;
11752 case FLOAT_EXPR:
11753 c1 = VEC_PACK_FLOAT_EXPR;
11754 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11755 break;
11757 default:
11758 gcc_unreachable ();
11761 if (!optab1)
11762 return false;
11764 vec_mode = TYPE_MODE (vectype);
11765 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11766 return false;
11768 *code1 = c1;
11770 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11772 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11773 return true;
11774 /* For scalar masks we may have different boolean
11775 vector types having the same QImode. Thus we
11776 add additional check for elements number. */
11777 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11778 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11779 return true;
11782 if (code == FLOAT_EXPR)
11783 return false;
11785 /* Check if it's a multi-step conversion that can be done using intermediate
11786 types. */
11787 prev_mode = vec_mode;
11788 prev_type = vectype;
11789 if (code == FIX_TRUNC_EXPR)
11790 uns = TYPE_UNSIGNED (vectype_out);
11791 else
11792 uns = TYPE_UNSIGNED (vectype);
11794 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11795 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11796 costly than signed. */
11797 if (code == FIX_TRUNC_EXPR && uns)
11799 enum insn_code icode2;
11801 intermediate_type
11802 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11803 interm_optab
11804 = optab_for_tree_code (c1, intermediate_type, optab_default);
11805 if (interm_optab != unknown_optab
11806 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11807 && insn_data[icode1].operand[0].mode
11808 == insn_data[icode2].operand[0].mode)
11810 uns = false;
11811 optab1 = interm_optab;
11812 icode1 = icode2;
11816 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11817 intermediate steps in promotion sequence. We try
11818 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11819 interm_types->create (MAX_INTERM_CVT_STEPS);
11820 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11822 intermediate_mode = insn_data[icode1].operand[0].mode;
11823 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11824 intermediate_type
11825 = vect_double_mask_nunits (prev_type, intermediate_mode);
11826 else
11827 intermediate_type
11828 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11829 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11830 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11831 && intermediate_mode == prev_mode
11832 && SCALAR_INT_MODE_P (prev_mode))
11833 interm_optab = vec_pack_sbool_trunc_optab;
11834 else
11835 interm_optab
11836 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11837 optab_default);
11838 if (!interm_optab
11839 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11840 || insn_data[icode1].operand[0].mode != intermediate_mode
11841 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11842 == CODE_FOR_nothing))
11843 break;
11845 interm_types->quick_push (intermediate_type);
11846 (*multi_step_cvt)++;
11848 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11850 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11851 return true;
11852 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11853 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11854 return true;
11857 prev_mode = intermediate_mode;
11858 prev_type = intermediate_type;
11859 optab1 = interm_optab;
11862 interm_types->release ();
11863 return false;
11866 /* Generate and return a statement that sets vector mask MASK such that
11867 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11869 gcall *
11870 vect_gen_while (tree mask, tree start_index, tree end_index)
11872 tree cmp_type = TREE_TYPE (start_index);
11873 tree mask_type = TREE_TYPE (mask);
11874 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11875 cmp_type, mask_type,
11876 OPTIMIZE_FOR_SPEED));
11877 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11878 start_index, end_index,
11879 build_zero_cst (mask_type));
11880 gimple_call_set_lhs (call, mask);
11881 return call;
11884 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11885 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11887 tree
11888 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11889 tree end_index)
11891 tree tmp = make_ssa_name (mask_type);
11892 gcall *call = vect_gen_while (tmp, start_index, end_index);
11893 gimple_seq_add_stmt (seq, call);
11894 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11897 /* Try to compute the vector types required to vectorize STMT_INFO,
11898 returning true on success and false if vectorization isn't possible.
11899 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11900 take sure that the number of elements in the vectors is no bigger
11901 than GROUP_SIZE.
11903 On success:
11905 - Set *STMT_VECTYPE_OUT to:
11906 - NULL_TREE if the statement doesn't need to be vectorized;
11907 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11909 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11910 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11911 statement does not help to determine the overall number of units. */
11913 opt_result
11914 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
11915 tree *stmt_vectype_out,
11916 tree *nunits_vectype_out,
11917 unsigned int group_size)
11919 gimple *stmt = stmt_info->stmt;
11921 /* For BB vectorization, we should always have a group size once we've
11922 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11923 are tentative requests during things like early data reference
11924 analysis and pattern recognition. */
11925 if (is_a <bb_vec_info> (vinfo))
11926 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11927 else
11928 group_size = 0;
11930 *stmt_vectype_out = NULL_TREE;
11931 *nunits_vectype_out = NULL_TREE;
11933 if (gimple_get_lhs (stmt) == NULL_TREE
11934 /* MASK_STORE has no lhs, but is ok. */
11935 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11937 if (is_a <gcall *> (stmt))
11939 /* Ignore calls with no lhs. These must be calls to
11940 #pragma omp simd functions, and what vectorization factor
11941 it really needs can't be determined until
11942 vectorizable_simd_clone_call. */
11943 if (dump_enabled_p ())
11944 dump_printf_loc (MSG_NOTE, vect_location,
11945 "defer to SIMD clone analysis.\n");
11946 return opt_result::success ();
11949 return opt_result::failure_at (stmt,
11950 "not vectorized: irregular stmt.%G", stmt);
11953 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11954 return opt_result::failure_at (stmt,
11955 "not vectorized: vector stmt in loop:%G",
11956 stmt);
11958 tree vectype;
11959 tree scalar_type = NULL_TREE;
11960 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
11962 vectype = STMT_VINFO_VECTYPE (stmt_info);
11963 if (dump_enabled_p ())
11964 dump_printf_loc (MSG_NOTE, vect_location,
11965 "precomputed vectype: %T\n", vectype);
11967 else if (vect_use_mask_type_p (stmt_info))
11969 unsigned int precision = stmt_info->mask_precision;
11970 scalar_type = build_nonstandard_integer_type (precision, 1);
11971 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
11972 if (!vectype)
11973 return opt_result::failure_at (stmt, "not vectorized: unsupported"
11974 " data-type %T\n", scalar_type);
11975 if (dump_enabled_p ())
11976 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11978 else
11980 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
11981 scalar_type = TREE_TYPE (DR_REF (dr));
11982 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
11983 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
11984 else
11985 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
11987 if (dump_enabled_p ())
11989 if (group_size)
11990 dump_printf_loc (MSG_NOTE, vect_location,
11991 "get vectype for scalar type (group size %d):"
11992 " %T\n", group_size, scalar_type);
11993 else
11994 dump_printf_loc (MSG_NOTE, vect_location,
11995 "get vectype for scalar type: %T\n", scalar_type);
11997 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11998 if (!vectype)
11999 return opt_result::failure_at (stmt,
12000 "not vectorized:"
12001 " unsupported data-type %T\n",
12002 scalar_type);
12004 if (dump_enabled_p ())
12005 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12007 *stmt_vectype_out = vectype;
12009 /* Don't try to compute scalar types if the stmt produces a boolean
12010 vector; use the existing vector type instead. */
12011 tree nunits_vectype = vectype;
12012 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12014 /* The number of units is set according to the smallest scalar
12015 type (or the largest vector size, but we only support one
12016 vector size per vectorization). */
12017 HOST_WIDE_INT dummy;
12018 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12019 if (scalar_type != TREE_TYPE (vectype))
12021 if (dump_enabled_p ())
12022 dump_printf_loc (MSG_NOTE, vect_location,
12023 "get vectype for smallest scalar type: %T\n",
12024 scalar_type);
12025 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12026 group_size);
12027 if (!nunits_vectype)
12028 return opt_result::failure_at
12029 (stmt, "not vectorized: unsupported data-type %T\n",
12030 scalar_type);
12031 if (dump_enabled_p ())
12032 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12033 nunits_vectype);
12037 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12038 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12040 if (dump_enabled_p ())
12042 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12043 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12044 dump_printf (MSG_NOTE, "\n");
12047 *nunits_vectype_out = nunits_vectype;
12048 return opt_result::success ();
12051 /* Generate and return statement sequence that sets vector length LEN that is:
12053 min_of_start_and_end = min (START_INDEX, END_INDEX);
12054 left_len = END_INDEX - min_of_start_and_end;
12055 rhs = min (left_len, LEN_LIMIT);
12056 LEN = rhs;
12058 Note: the cost of the code generated by this function is modeled
12059 by vect_estimate_min_profitable_iters, so changes here may need
12060 corresponding changes there. */
12062 gimple_seq
12063 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12065 gimple_seq stmts = NULL;
12066 tree len_type = TREE_TYPE (len);
12067 gcc_assert (TREE_TYPE (start_index) == len_type);
12069 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12070 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12071 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12072 gimple* stmt = gimple_build_assign (len, rhs);
12073 gimple_seq_add_stmt (&stmts, stmt);
12075 return stmts;