PR 87926
[official-gcc.git] / gcc / tree-vect-stmts.c
blob80f6d2b8f7d3d7bd05b799be6c919ce7a00265d0
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
101 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
102 body_cost_vec->safe_push (si);
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
111 static tree
112 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT_INFO and the vector is associated
121 with scalar destination SCALAR_DEST. */
123 static tree
124 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
125 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
127 tree vect_type, vect, vect_name, array_ref;
128 gimple *new_stmt;
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
140 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
142 return vect_name;
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT_INFO. */
149 static void
150 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
151 tree vect, tree array, unsigned HOST_WIDE_INT n)
153 tree array_ref;
154 gimple *new_stmt;
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
160 new_stmt = gimple_build_assign (array_ref, vect);
161 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
168 static tree
169 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
171 tree mem_ref;
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
176 return mem_ref;
179 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
180 Emit the clobber before *GSI. */
182 static void
183 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
184 tree var)
186 tree clobber = build_clobber (TREE_TYPE (var));
187 gimple *new_stmt = gimple_build_assign (var, clobber);
188 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
197 static void
198 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
199 enum vect_relevant relevant, bool live_p)
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
204 if (dump_enabled_p ())
205 dump_printf_loc (MSG_NOTE, vect_location,
206 "mark relevant %d, live %d: %G", relevant, live_p,
207 stmt_info->stmt);
209 /* If this stmt is an original stmt in a pattern, we might need to mark its
210 related pattern stmt instead of the original stmt. However, such stmts
211 may have their own uses that are not in any pattern, in such cases the
212 stmt itself should be marked. */
213 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
215 /* This is the last stmt in a sequence that was detected as a
216 pattern that can potentially be vectorized. Don't mark the stmt
217 as relevant/live because it's not going to be vectorized.
218 Instead mark the pattern-stmt that replaces it. */
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_vec_info old_stmt_info = stmt_info;
225 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
226 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
227 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
228 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE, vect_location,
240 "already marked relevant/live.\n");
241 return;
244 worklist->safe_push (stmt_info);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT_INFO is simple and all uses of it are invariant. */
252 bool
253 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
254 loop_vec_info loop_vinfo)
256 tree op;
257 ssa_op_iter iter;
259 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
260 if (!stmt)
261 return false;
263 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
265 enum vect_def_type dt = vect_uninitialized_def;
267 if (!vect_is_simple_use (op, loop_vinfo, &dt))
269 if (dump_enabled_p ())
270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
271 "use not simple.\n");
272 return false;
275 if (dt != vect_external_def && dt != vect_constant_def)
276 return false;
278 return true;
281 /* Function vect_stmt_relevant_p.
283 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
284 is "relevant for vectorization".
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
291 CHECKME: what other side effects would the vectorizer allow? */
293 static bool
294 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
295 enum vect_relevant *relevant, bool *live_p)
297 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
298 ssa_op_iter op_iter;
299 imm_use_iterator imm_iter;
300 use_operand_p use_p;
301 def_operand_p def_p;
303 *relevant = vect_unused_in_scope;
304 *live_p = false;
306 /* cond stmt other than loop exit cond. */
307 if (is_ctrl_stmt (stmt_info->stmt)
308 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
311 /* changing memory. */
312 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt_info->stmt)
314 && !gimple_clobber_p (stmt_info->stmt))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
342 *live_p = true;
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
356 return (*live_p || *relevant);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT_INFO. Check if USE is
363 used in STMT_INFO for anything other than indexing an array. */
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
368 tree operand;
370 /* USE corresponds to some operand in STMT. If there is no data
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info))
374 return true;
376 /* STMT has a data_ref. FORNOW this means that its of one of
377 the following forms:
378 -1- ARRAY_REF = var
379 -2- var = ARRAY_REF
380 (This should have been verified in analyze_data_refs).
382 'var' in the second case corresponds to a def, not a use,
383 so USE cannot correspond to any operands that are not used
384 for array indexing.
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
389 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
390 if (!assign || !gimple_assign_copy_p (assign))
392 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
393 if (call && gimple_call_internal_p (call))
395 internal_fn ifn = gimple_call_internal_fn (call);
396 int mask_index = internal_fn_mask_index (ifn);
397 if (mask_index >= 0
398 && use == gimple_call_arg (call, mask_index))
399 return true;
400 int stored_value_index = internal_fn_stored_value_index (ifn);
401 if (stored_value_index >= 0
402 && use == gimple_call_arg (call, stored_value_index))
403 return true;
404 if (internal_gather_scatter_fn_p (ifn)
405 && use == gimple_call_arg (call, 1))
406 return true;
408 return false;
411 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
412 return false;
413 operand = gimple_assign_rhs1 (assign);
414 if (TREE_CODE (operand) != SSA_NAME)
415 return false;
417 if (operand == use)
418 return true;
420 return false;
425 Function process_use.
427 Inputs:
428 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433 be performed.
435 Outputs:
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
440 Exceptions:
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
445 we skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
447 "relevant" will be modified accordingly.
449 Return true if everything is as expected. Return false otherwise. */
451 static opt_result
452 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
453 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
454 bool force)
456 stmt_vec_info dstmt_vinfo;
457 basic_block bb, def_bb;
458 enum vect_def_type dt;
460 /* case 1: we are only interested in uses that need to be vectorized. Uses
461 that are used for address computation are not considered relevant. */
462 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
463 return opt_result::success ();
465 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
466 return opt_result::failure_at (stmt_vinfo->stmt,
467 "not vectorized:"
468 " unsupported use in stmt.\n");
470 if (!dstmt_vinfo)
471 return opt_result::success ();
473 def_bb = gimple_bb (dstmt_vinfo->stmt);
475 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
476 DSTMT_VINFO must have already been processed, because this should be the
477 only way that STMT, which is a reduction-phi, was put in the worklist,
478 as there should be no other uses for DSTMT_VINFO in the loop. So we just
479 check that everything is as expected, and we are done. */
480 bb = gimple_bb (stmt_vinfo->stmt);
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
491 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
492 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
610 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
652 use_operand_p use_p;
653 ssa_op_iter iter;
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
704 default:
705 break;
708 if (is_pattern_stmt_p (stmt_vinfo))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
732 for (; i < gimple_num_ops (assign); i++)
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
747 for (i = 0; i < gimple_call_num_args (call); i++)
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 return res;
780 } /* while worklist */
782 return opt_result::success ();
785 /* Compute the prologue cost for invariant or constant operands. */
787 static unsigned
788 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
789 unsigned opno, enum vect_def_type dt,
790 stmt_vector_for_cost *cost_vec)
792 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
793 tree op = gimple_op (stmt, opno);
794 unsigned prologue_cost = 0;
796 /* Without looking at the actual initializer a vector of
797 constants can be implemented as load from the constant pool.
798 When all elements are the same we can use a splat. */
799 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
800 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
801 unsigned num_vects_to_check;
802 unsigned HOST_WIDE_INT const_nunits;
803 unsigned nelt_limit;
804 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
805 && ! multiple_p (const_nunits, group_size))
807 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
808 nelt_limit = const_nunits;
810 else
812 /* If either the vector has variable length or the vectors
813 are composed of repeated whole groups we only need to
814 cost construction once. All vectors will be the same. */
815 num_vects_to_check = 1;
816 nelt_limit = group_size;
818 tree elt = NULL_TREE;
819 unsigned nelt = 0;
820 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
822 unsigned si = j % group_size;
823 if (nelt == 0)
824 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
825 /* ??? We're just tracking whether all operands of a single
826 vector initializer are the same, ideally we'd check if
827 we emitted the same one already. */
828 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
829 opno))
830 elt = NULL_TREE;
831 nelt++;
832 if (nelt == nelt_limit)
834 /* ??? We need to pass down stmt_info for a vector type
835 even if it points to the wrong stmt. */
836 prologue_cost += record_stmt_cost
837 (cost_vec, 1,
838 dt == vect_external_def
839 ? (elt ? scalar_to_vec : vec_construct)
840 : vector_load,
841 stmt_info, 0, vect_prologue);
842 nelt = 0;
846 return prologue_cost;
849 /* Function vect_model_simple_cost.
851 Models cost for simple operations, i.e. those that only emit ncopies of a
852 single op. Right now, this does not account for multiple insns that could
853 be generated for the single vector op. We will handle that shortly. */
855 static void
856 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
857 enum vect_def_type *dt,
858 int ndts,
859 slp_tree node,
860 stmt_vector_for_cost *cost_vec)
862 int inside_cost = 0, prologue_cost = 0;
864 gcc_assert (cost_vec != NULL);
866 /* ??? Somehow we need to fix this at the callers. */
867 if (node)
868 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
870 if (node)
872 /* Scan operands and account for prologue cost of constants/externals.
873 ??? This over-estimates cost for multiple uses and should be
874 re-engineered. */
875 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
876 tree lhs = gimple_get_lhs (stmt);
877 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
879 tree op = gimple_op (stmt, i);
880 enum vect_def_type dt;
881 if (!op || op == lhs)
882 continue;
883 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
884 && (dt == vect_constant_def || dt == vect_external_def))
885 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
886 i, dt, cost_vec);
889 else
890 /* Cost the "broadcast" of a scalar operand in to a vector operand.
891 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
892 cost model. */
893 for (int i = 0; i < ndts; i++)
894 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
895 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
896 stmt_info, 0, vect_prologue);
898 /* Adjust for two-operator SLP nodes. */
899 if (node && SLP_TREE_TWO_OPERATORS (node))
901 ncopies *= 2;
902 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
903 stmt_info, 0, vect_body);
906 /* Pass the inside-of-loop statements to the target-specific cost model. */
907 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
908 stmt_info, 0, vect_body);
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE, vect_location,
912 "vect_model_simple_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost, prologue_cost);
917 /* Model cost for type demotion and promotion operations. PWR is normally
918 zero for single-step promotions and demotions. It will be one if
919 two-step promotion/demotion is required, and so on. Each additional
920 step doubles the number of instructions required. */
922 static void
923 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
924 enum vect_def_type *dt, int pwr,
925 stmt_vector_for_cost *cost_vec)
927 int i, tmp;
928 int inside_cost = 0, prologue_cost = 0;
930 for (i = 0; i < pwr + 1; i++)
932 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
933 (i + 1) : i;
934 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
935 vec_promote_demote, stmt_info, 0,
936 vect_body);
939 /* FORNOW: Assuming maximum 2 args per stmts. */
940 for (i = 0; i < 2; i++)
941 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
942 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
943 stmt_info, 0, vect_prologue);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "vect_model_promotion_demotion_cost: inside_cost = %d, "
948 "prologue_cost = %d .\n", inside_cost, prologue_cost);
951 /* Function vect_model_store_cost
953 Models cost for stores. In the case of grouped accesses, one access
954 has the overhead of the grouped access attributed to it. */
956 static void
957 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
958 enum vect_def_type dt,
959 vect_memory_access_type memory_access_type,
960 vec_load_store_type vls_type, slp_tree slp_node,
961 stmt_vector_for_cost *cost_vec)
963 unsigned int inside_cost = 0, prologue_cost = 0;
964 stmt_vec_info first_stmt_info = stmt_info;
965 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
967 /* ??? Somehow we need to fix this at the callers. */
968 if (slp_node)
969 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
971 if (vls_type == VLS_STORE_INVARIANT)
973 if (slp_node)
974 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
975 1, dt, cost_vec);
976 else
977 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
978 stmt_info, 0, vect_prologue);
981 /* Grouped stores update all elements in the group at once,
982 so we want the DR for the first statement. */
983 if (!slp_node && grouped_access_p)
984 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
986 /* True if we should include any once-per-group costs as well as
987 the cost of the statement itself. For SLP we only get called
988 once per group anyhow. */
989 bool first_stmt_p = (first_stmt_info == stmt_info);
991 /* We assume that the cost of a single store-lanes instruction is
992 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
993 access is instead being provided by a permute-and-store operation,
994 include the cost of the permutes. */
995 if (first_stmt_p
996 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
998 /* Uses a high and low interleave or shuffle operations for each
999 needed permute. */
1000 int group_size = DR_GROUP_SIZE (first_stmt_info);
1001 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1002 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1003 stmt_info, 0, vect_body);
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE, vect_location,
1007 "vect_model_store_cost: strided group_size = %d .\n",
1008 group_size);
1011 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1012 /* Costs of the stores. */
1013 if (memory_access_type == VMAT_ELEMENTWISE
1014 || memory_access_type == VMAT_GATHER_SCATTER)
1016 /* N scalar stores plus extracting the elements. */
1017 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1018 inside_cost += record_stmt_cost (cost_vec,
1019 ncopies * assumed_nunits,
1020 scalar_store, stmt_info, 0, vect_body);
1022 else
1023 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1025 if (memory_access_type == VMAT_ELEMENTWISE
1026 || memory_access_type == VMAT_STRIDED_SLP)
1028 /* N scalar stores plus extracting the elements. */
1029 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1030 inside_cost += record_stmt_cost (cost_vec,
1031 ncopies * assumed_nunits,
1032 vec_to_scalar, stmt_info, 0, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_store_cost: inside_cost = %d, "
1038 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1042 /* Calculate cost of DR's memory access. */
1043 void
1044 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1045 unsigned int *inside_cost,
1046 stmt_vector_for_cost *body_cost_vec)
1048 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1049 int alignment_support_scheme
1050 = vect_supportable_dr_alignment (dr_info, false);
1052 switch (alignment_support_scheme)
1054 case dr_aligned:
1056 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1057 vector_store, stmt_info, 0,
1058 vect_body);
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE, vect_location,
1062 "vect_model_store_cost: aligned.\n");
1063 break;
1066 case dr_unaligned_supported:
1068 /* Here, we assign an additional cost for the unaligned store. */
1069 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1070 unaligned_store, stmt_info,
1071 DR_MISALIGNMENT (dr_info),
1072 vect_body);
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_NOTE, vect_location,
1075 "vect_model_store_cost: unaligned supported by "
1076 "hardware.\n");
1077 break;
1080 case dr_unaligned_unsupported:
1082 *inside_cost = VECT_MAX_COST;
1084 if (dump_enabled_p ())
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1086 "vect_model_store_cost: unsupported access.\n");
1087 break;
1090 default:
1091 gcc_unreachable ();
1096 /* Function vect_model_load_cost
1098 Models cost for loads. In the case of grouped accesses, one access has
1099 the overhead of the grouped access attributed to it. Since unaligned
1100 accesses are supported for loads, we also account for the costs of the
1101 access scheme chosen. */
1103 static void
1104 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1105 vect_memory_access_type memory_access_type,
1106 slp_instance instance,
1107 slp_tree slp_node,
1108 stmt_vector_for_cost *cost_vec)
1110 unsigned int inside_cost = 0, prologue_cost = 0;
1111 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1113 gcc_assert (cost_vec);
1115 /* ??? Somehow we need to fix this at the callers. */
1116 if (slp_node)
1117 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1119 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1121 /* If the load is permuted then the alignment is determined by
1122 the first group element not by the first scalar stmt DR. */
1123 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1124 /* Record the cost for the permutation. */
1125 unsigned n_perms;
1126 unsigned assumed_nunits
1127 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1128 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1129 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1130 slp_vf, instance, true,
1131 &n_perms);
1132 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1133 first_stmt_info, 0, vect_body);
1134 /* And adjust the number of loads performed. This handles
1135 redundancies as well as loads that are later dead. */
1136 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1137 bitmap_clear (perm);
1138 for (unsigned i = 0;
1139 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1140 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1141 ncopies = 0;
1142 bool load_seen = false;
1143 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1145 if (i % assumed_nunits == 0)
1147 if (load_seen)
1148 ncopies++;
1149 load_seen = false;
1151 if (bitmap_bit_p (perm, i))
1152 load_seen = true;
1154 if (load_seen)
1155 ncopies++;
1156 gcc_assert (ncopies
1157 <= (DR_GROUP_SIZE (first_stmt_info)
1158 - DR_GROUP_GAP (first_stmt_info)
1159 + assumed_nunits - 1) / assumed_nunits);
1162 /* Grouped loads read all elements in the group at once,
1163 so we want the DR for the first statement. */
1164 stmt_vec_info first_stmt_info = stmt_info;
1165 if (!slp_node && grouped_access_p)
1166 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1168 /* True if we should include any once-per-group costs as well as
1169 the cost of the statement itself. For SLP we only get called
1170 once per group anyhow. */
1171 bool first_stmt_p = (first_stmt_info == stmt_info);
1173 /* We assume that the cost of a single load-lanes instruction is
1174 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1175 access is instead being provided by a load-and-permute operation,
1176 include the cost of the permutes. */
1177 if (first_stmt_p
1178 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1180 /* Uses an even and odd extract operations or shuffle operations
1181 for each needed permute. */
1182 int group_size = DR_GROUP_SIZE (first_stmt_info);
1183 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1184 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1185 stmt_info, 0, vect_body);
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: strided group_size = %d .\n",
1190 group_size);
1193 /* The loads themselves. */
1194 if (memory_access_type == VMAT_ELEMENTWISE
1195 || memory_access_type == VMAT_GATHER_SCATTER)
1197 /* N scalar loads plus gathering them into a vector. */
1198 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1199 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1200 inside_cost += record_stmt_cost (cost_vec,
1201 ncopies * assumed_nunits,
1202 scalar_load, stmt_info, 0, vect_body);
1204 else
1205 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1206 &inside_cost, &prologue_cost,
1207 cost_vec, cost_vec, true);
1208 if (memory_access_type == VMAT_ELEMENTWISE
1209 || memory_access_type == VMAT_STRIDED_SLP)
1210 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1211 stmt_info, 0, vect_body);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: inside_cost = %d, "
1216 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1220 /* Calculate cost of DR's memory access. */
1221 void
1222 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1223 bool add_realign_cost, unsigned int *inside_cost,
1224 unsigned int *prologue_cost,
1225 stmt_vector_for_cost *prologue_cost_vec,
1226 stmt_vector_for_cost *body_cost_vec,
1227 bool record_prologue_costs)
1229 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1230 int alignment_support_scheme
1231 = vect_supportable_dr_alignment (dr_info, false);
1233 switch (alignment_support_scheme)
1235 case dr_aligned:
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1238 stmt_info, 0, vect_body);
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE, vect_location,
1242 "vect_model_load_cost: aligned.\n");
1244 break;
1246 case dr_unaligned_supported:
1248 /* Here, we assign an additional cost for the unaligned load. */
1249 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1250 unaligned_load, stmt_info,
1251 DR_MISALIGNMENT (dr_info),
1252 vect_body);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1259 break;
1261 case dr_explicit_realign:
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1279 break;
1281 case dr_explicit_realign_optimized:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost && record_prologue_costs)
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1316 break;
1319 case dr_unaligned_unsupported:
1321 *inside_cost = VECT_MAX_COST;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1329 default:
1330 gcc_unreachable ();
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1337 static void
1338 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1341 if (gsi)
1342 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1343 else
1345 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1347 if (loop_vinfo)
1349 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1350 basic_block new_bb;
1351 edge pe;
1353 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1354 loop = loop->inner;
1356 pe = loop_preheader_edge (loop);
1357 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1358 gcc_assert (!new_bb);
1360 else
1362 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1363 basic_block bb;
1364 gimple_stmt_iterator gsi_bb_start;
1366 gcc_assert (bb_vinfo);
1367 bb = BB_VINFO_BB (bb_vinfo);
1368 gsi_bb_start = gsi_after_labels (bb);
1369 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1373 if (dump_enabled_p ())
1374 dump_printf_loc (MSG_NOTE, vect_location,
1375 "created new init_stmt: %G", new_stmt);
1378 /* Function vect_init_vector.
1380 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1381 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1382 vector type a vector with all elements equal to VAL is created first.
1383 Place the initialization at BSI if it is not NULL. Otherwise, place the
1384 initialization at the loop preheader.
1385 Return the DEF of INIT_STMT.
1386 It will be used in the vectorization of STMT_INFO. */
1388 tree
1389 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1390 gimple_stmt_iterator *gsi)
1392 gimple *init_stmt;
1393 tree new_temp;
1395 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1396 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1398 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1399 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1401 /* Scalar boolean value should be transformed into
1402 all zeros or all ones value before building a vector. */
1403 if (VECTOR_BOOLEAN_TYPE_P (type))
1405 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1406 tree false_val = build_zero_cst (TREE_TYPE (type));
1408 if (CONSTANT_CLASS_P (val))
1409 val = integer_zerop (val) ? false_val : true_val;
1410 else
1412 new_temp = make_ssa_name (TREE_TYPE (type));
1413 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1414 val, true_val, false_val);
1415 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1416 val = new_temp;
1419 else if (CONSTANT_CLASS_P (val))
1420 val = fold_convert (TREE_TYPE (type), val);
1421 else
1423 new_temp = make_ssa_name (TREE_TYPE (type));
1424 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1425 init_stmt = gimple_build_assign (new_temp,
1426 fold_build1 (VIEW_CONVERT_EXPR,
1427 TREE_TYPE (type),
1428 val));
1429 else
1430 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1431 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1432 val = new_temp;
1435 val = build_vector_from_val (type, val);
1438 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1439 init_stmt = gimple_build_assign (new_temp, val);
1440 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1441 return new_temp;
1444 /* Function vect_get_vec_def_for_operand_1.
1446 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1447 with type DT that will be used in the vectorized stmt. */
1449 tree
1450 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1451 enum vect_def_type dt)
1453 tree vec_oprnd;
1454 stmt_vec_info vec_stmt_info;
1456 switch (dt)
1458 /* operand is a constant or a loop invariant. */
1459 case vect_constant_def:
1460 case vect_external_def:
1461 /* Code should use vect_get_vec_def_for_operand. */
1462 gcc_unreachable ();
1464 /* Operand is defined by a loop header phi. In case of nested
1465 cycles we also may have uses of the backedge def. */
1466 case vect_reduction_def:
1467 case vect_double_reduction_def:
1468 case vect_nested_cycle:
1469 case vect_induction_def:
1470 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1471 || dt == vect_nested_cycle);
1472 /* Fallthru. */
1474 /* operand is defined inside the loop. */
1475 case vect_internal_def:
1477 /* Get the def from the vectorized stmt. */
1478 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1479 /* Get vectorized pattern statement. */
1480 if (!vec_stmt_info
1481 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1482 && !STMT_VINFO_RELEVANT (def_stmt_info))
1483 vec_stmt_info = (STMT_VINFO_VEC_STMT
1484 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1485 gcc_assert (vec_stmt_info);
1486 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1487 vec_oprnd = PHI_RESULT (phi);
1488 else
1489 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1490 return vec_oprnd;
1493 default:
1494 gcc_unreachable ();
1499 /* Function vect_get_vec_def_for_operand.
1501 OP is an operand in STMT_VINFO. This function returns a (vector) def
1502 that will be used in the vectorized stmt for STMT_VINFO.
1504 In the case that OP is an SSA_NAME which is defined in the loop, then
1505 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1507 In case OP is an invariant or constant, a new stmt that creates a vector def
1508 needs to be introduced. VECTYPE may be used to specify a required type for
1509 vector invariant. */
1511 tree
1512 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1514 gimple *def_stmt;
1515 enum vect_def_type dt;
1516 bool is_simple_use;
1517 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location,
1521 "vect_get_vec_def_for_operand: %T\n", op);
1523 stmt_vec_info def_stmt_info;
1524 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1525 &def_stmt_info, &def_stmt);
1526 gcc_assert (is_simple_use);
1527 if (def_stmt && dump_enabled_p ())
1528 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1530 if (dt == vect_constant_def || dt == vect_external_def)
1532 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1533 tree vector_type;
1535 if (vectype)
1536 vector_type = vectype;
1537 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1538 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1539 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1540 else
1541 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1543 gcc_assert (vector_type);
1544 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1546 else
1547 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1551 /* Function vect_get_vec_def_for_stmt_copy
1553 Return a vector-def for an operand. This function is used when the
1554 vectorized stmt to be created (by the caller to this function) is a "copy"
1555 created in case the vectorized result cannot fit in one vector, and several
1556 copies of the vector-stmt are required. In this case the vector-def is
1557 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1558 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1560 Context:
1561 In case the vectorization factor (VF) is bigger than the number
1562 of elements that can fit in a vectype (nunits), we have to generate
1563 more than one vector stmt to vectorize the scalar stmt. This situation
1564 arises when there are multiple data-types operated upon in the loop; the
1565 smallest data-type determines the VF, and as a result, when vectorizing
1566 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1567 vector stmt (each computing a vector of 'nunits' results, and together
1568 computing 'VF' results in each iteration). This function is called when
1569 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1570 which VF=16 and nunits=4, so the number of copies required is 4):
1572 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1574 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1575 VS1.1: vx.1 = memref1 VS1.2
1576 VS1.2: vx.2 = memref2 VS1.3
1577 VS1.3: vx.3 = memref3
1579 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1580 VSnew.1: vz1 = vx.1 + ... VSnew.2
1581 VSnew.2: vz2 = vx.2 + ... VSnew.3
1582 VSnew.3: vz3 = vx.3 + ...
1584 The vectorization of S1 is explained in vectorizable_load.
1585 The vectorization of S2:
1586 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1587 the function 'vect_get_vec_def_for_operand' is called to
1588 get the relevant vector-def for each operand of S2. For operand x it
1589 returns the vector-def 'vx.0'.
1591 To create the remaining copies of the vector-stmt (VSnew.j), this
1592 function is called to get the relevant vector-def for each operand. It is
1593 obtained from the respective VS1.j stmt, which is recorded in the
1594 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1596 For example, to obtain the vector-def 'vx.1' in order to create the
1597 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1598 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1599 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1600 and return its def ('vx.1').
1601 Overall, to create the above sequence this function will be called 3 times:
1602 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1603 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1604 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1606 tree
1607 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1609 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1610 if (!def_stmt_info)
1611 /* Do nothing; can reuse same def. */
1612 return vec_oprnd;
1614 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1615 gcc_assert (def_stmt_info);
1616 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1617 vec_oprnd = PHI_RESULT (phi);
1618 else
1619 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1620 return vec_oprnd;
1624 /* Get vectorized definitions for the operands to create a copy of an original
1625 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1627 void
1628 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1629 vec<tree> *vec_oprnds0,
1630 vec<tree> *vec_oprnds1)
1632 tree vec_oprnd = vec_oprnds0->pop ();
1634 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1635 vec_oprnds0->quick_push (vec_oprnd);
1637 if (vec_oprnds1 && vec_oprnds1->length ())
1639 vec_oprnd = vec_oprnds1->pop ();
1640 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1641 vec_oprnds1->quick_push (vec_oprnd);
1646 /* Get vectorized definitions for OP0 and OP1. */
1648 void
1649 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1650 vec<tree> *vec_oprnds0,
1651 vec<tree> *vec_oprnds1,
1652 slp_tree slp_node)
1654 if (slp_node)
1656 int nops = (op1 == NULL_TREE) ? 1 : 2;
1657 auto_vec<tree> ops (nops);
1658 auto_vec<vec<tree> > vec_defs (nops);
1660 ops.quick_push (op0);
1661 if (op1)
1662 ops.quick_push (op1);
1664 vect_get_slp_defs (ops, slp_node, &vec_defs);
1666 *vec_oprnds0 = vec_defs[0];
1667 if (op1)
1668 *vec_oprnds1 = vec_defs[1];
1670 else
1672 tree vec_oprnd;
1674 vec_oprnds0->create (1);
1675 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1676 vec_oprnds0->quick_push (vec_oprnd);
1678 if (op1)
1680 vec_oprnds1->create (1);
1681 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1682 vec_oprnds1->quick_push (vec_oprnd);
1687 /* Helper function called by vect_finish_replace_stmt and
1688 vect_finish_stmt_generation. Set the location of the new
1689 statement and create and return a stmt_vec_info for it. */
1691 static stmt_vec_info
1692 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1694 vec_info *vinfo = stmt_info->vinfo;
1696 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1698 if (dump_enabled_p ())
1699 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1701 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1703 /* While EH edges will generally prevent vectorization, stmt might
1704 e.g. be in a must-not-throw region. Ensure newly created stmts
1705 that could throw are part of the same region. */
1706 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1707 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1708 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1710 return vec_stmt_info;
1713 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1714 which sets the same scalar result as STMT_INFO did. Create and return a
1715 stmt_vec_info for VEC_STMT. */
1717 stmt_vec_info
1718 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1720 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1722 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1723 gsi_replace (&gsi, vec_stmt, true);
1725 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1728 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1729 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1731 stmt_vec_info
1732 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1733 gimple_stmt_iterator *gsi)
1735 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1737 if (!gsi_end_p (*gsi)
1738 && gimple_has_mem_ops (vec_stmt))
1740 gimple *at_stmt = gsi_stmt (*gsi);
1741 tree vuse = gimple_vuse (at_stmt);
1742 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1744 tree vdef = gimple_vdef (at_stmt);
1745 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1746 /* If we have an SSA vuse and insert a store, update virtual
1747 SSA form to avoid triggering the renamer. Do so only
1748 if we can easily see all uses - which is what almost always
1749 happens with the way vectorized stmts are inserted. */
1750 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1751 && ((is_gimple_assign (vec_stmt)
1752 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1753 || (is_gimple_call (vec_stmt)
1754 && !(gimple_call_flags (vec_stmt)
1755 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1757 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1758 gimple_set_vdef (vec_stmt, new_vdef);
1759 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1763 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1764 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1767 /* We want to vectorize a call to combined function CFN with function
1768 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1769 as the types of all inputs. Check whether this is possible using
1770 an internal function, returning its code if so or IFN_LAST if not. */
1772 static internal_fn
1773 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1774 tree vectype_out, tree vectype_in)
1776 internal_fn ifn;
1777 if (internal_fn_p (cfn))
1778 ifn = as_internal_fn (cfn);
1779 else
1780 ifn = associated_internal_fn (fndecl);
1781 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1783 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1784 if (info.vectorizable)
1786 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1787 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1788 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1789 OPTIMIZE_FOR_SPEED))
1790 return ifn;
1793 return IFN_LAST;
1797 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1798 gimple_stmt_iterator *);
1800 /* Check whether a load or store statement in the loop described by
1801 LOOP_VINFO is possible in a fully-masked loop. This is testing
1802 whether the vectorizer pass has the appropriate support, as well as
1803 whether the target does.
1805 VLS_TYPE says whether the statement is a load or store and VECTYPE
1806 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1807 says how the load or store is going to be implemented and GROUP_SIZE
1808 is the number of load or store statements in the containing group.
1809 If the access is a gather load or scatter store, GS_INFO describes
1810 its arguments.
1812 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1813 supported, otherwise record the required mask types. */
1815 static void
1816 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1817 vec_load_store_type vls_type, int group_size,
1818 vect_memory_access_type memory_access_type,
1819 gather_scatter_info *gs_info)
1821 /* Invariant loads need no special support. */
1822 if (memory_access_type == VMAT_INVARIANT)
1823 return;
1825 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1826 machine_mode vecmode = TYPE_MODE (vectype);
1827 bool is_load = (vls_type == VLS_LOAD);
1828 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1830 if (is_load
1831 ? !vect_load_lanes_supported (vectype, group_size, true)
1832 : !vect_store_lanes_supported (vectype, group_size, true))
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1836 "can't use a fully-masked loop because the"
1837 " target doesn't have an appropriate masked"
1838 " load/store-lanes instruction.\n");
1839 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1840 return;
1842 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1843 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1844 return;
1847 if (memory_access_type == VMAT_GATHER_SCATTER)
1849 internal_fn ifn = (is_load
1850 ? IFN_MASK_GATHER_LOAD
1851 : IFN_MASK_SCATTER_STORE);
1852 tree offset_type = TREE_TYPE (gs_info->offset);
1853 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1854 gs_info->memory_type,
1855 TYPE_SIGN (offset_type),
1856 gs_info->scale))
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860 "can't use a fully-masked loop because the"
1861 " target doesn't have an appropriate masked"
1862 " gather load or scatter store instruction.\n");
1863 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1864 return;
1866 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1867 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1868 return;
1871 if (memory_access_type != VMAT_CONTIGUOUS
1872 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1874 /* Element X of the data must come from iteration i * VF + X of the
1875 scalar loop. We need more work to support other mappings. */
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878 "can't use a fully-masked loop because an access"
1879 " isn't contiguous.\n");
1880 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1881 return;
1884 machine_mode mask_mode;
1885 if (!(targetm.vectorize.get_mask_mode
1886 (GET_MODE_NUNITS (vecmode),
1887 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1888 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1890 if (dump_enabled_p ())
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1892 "can't use a fully-masked loop because the target"
1893 " doesn't have the appropriate masked load or"
1894 " store.\n");
1895 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1896 return;
1898 /* We might load more scalars than we need for permuting SLP loads.
1899 We checked in get_group_load_store_type that the extra elements
1900 don't leak into a new vector. */
1901 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1902 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1903 unsigned int nvectors;
1904 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1905 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1906 else
1907 gcc_unreachable ();
1910 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1911 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1912 that needs to be applied to all loads and stores in a vectorized loop.
1913 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1915 MASK_TYPE is the type of both masks. If new statements are needed,
1916 insert them before GSI. */
1918 static tree
1919 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1920 gimple_stmt_iterator *gsi)
1922 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1923 if (!loop_mask)
1924 return vec_mask;
1926 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1927 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1928 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1929 vec_mask, loop_mask);
1930 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1931 return and_res;
1934 /* Determine whether we can use a gather load or scatter store to vectorize
1935 strided load or store STMT_INFO by truncating the current offset to a
1936 smaller width. We need to be able to construct an offset vector:
1938 { 0, X, X*2, X*3, ... }
1940 without loss of precision, where X is STMT_INFO's DR_STEP.
1942 Return true if this is possible, describing the gather load or scatter
1943 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1945 static bool
1946 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1947 loop_vec_info loop_vinfo, bool masked_p,
1948 gather_scatter_info *gs_info)
1950 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1951 data_reference *dr = dr_info->dr;
1952 tree step = DR_STEP (dr);
1953 if (TREE_CODE (step) != INTEGER_CST)
1955 /* ??? Perhaps we could use range information here? */
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "cannot truncate variable step.\n");
1959 return false;
1962 /* Get the number of bits in an element. */
1963 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1964 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1965 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1967 /* Set COUNT to the upper limit on the number of elements - 1.
1968 Start with the maximum vectorization factor. */
1969 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1971 /* Try lowering COUNT to the number of scalar latch iterations. */
1972 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1973 widest_int max_iters;
1974 if (max_loop_iterations (loop, &max_iters)
1975 && max_iters < count)
1976 count = max_iters.to_shwi ();
1978 /* Try scales of 1 and the element size. */
1979 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1980 wi::overflow_type overflow = wi::OVF_NONE;
1981 for (int i = 0; i < 2; ++i)
1983 int scale = scales[i];
1984 widest_int factor;
1985 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1986 continue;
1988 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1989 in OFFSET_BITS bits. */
1990 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1991 if (overflow)
1992 continue;
1993 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1994 if (wi::min_precision (range, sign) > element_bits)
1996 overflow = wi::OVF_UNKNOWN;
1997 continue;
2000 /* See whether the target supports the operation. */
2001 tree memory_type = TREE_TYPE (DR_REF (dr));
2002 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2003 memory_type, element_bits, sign, scale,
2004 &gs_info->ifn, &gs_info->element_type))
2005 continue;
2007 tree offset_type = build_nonstandard_integer_type (element_bits,
2008 sign == UNSIGNED);
2010 gs_info->decl = NULL_TREE;
2011 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2012 but we don't need to store that here. */
2013 gs_info->base = NULL_TREE;
2014 gs_info->offset = fold_convert (offset_type, step);
2015 gs_info->offset_dt = vect_constant_def;
2016 gs_info->offset_vectype = NULL_TREE;
2017 gs_info->scale = scale;
2018 gs_info->memory_type = memory_type;
2019 return true;
2022 if (overflow && dump_enabled_p ())
2023 dump_printf_loc (MSG_NOTE, vect_location,
2024 "truncating gather/scatter offset to %d bits"
2025 " might change its value.\n", element_bits);
2027 return false;
2030 /* Return true if we can use gather/scatter internal functions to
2031 vectorize STMT_INFO, which is a grouped or strided load or store.
2032 MASKED_P is true if load or store is conditional. When returning
2033 true, fill in GS_INFO with the information required to perform the
2034 operation. */
2036 static bool
2037 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2038 loop_vec_info loop_vinfo, bool masked_p,
2039 gather_scatter_info *gs_info)
2041 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2042 || gs_info->decl)
2043 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2044 masked_p, gs_info);
2046 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2047 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2048 tree offset_type = TREE_TYPE (gs_info->offset);
2049 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2051 /* Enforced by vect_check_gather_scatter. */
2052 gcc_assert (element_bits >= offset_bits);
2054 /* If the elements are wider than the offset, convert the offset to the
2055 same width, without changing its sign. */
2056 if (element_bits > offset_bits)
2058 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2059 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2060 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_NOTE, vect_location,
2065 "using gather/scatter for strided/grouped access,"
2066 " scale = %d\n", gs_info->scale);
2068 return true;
2071 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2072 elements with a known constant step. Return -1 if that step
2073 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2075 static int
2076 compare_step_with_zero (stmt_vec_info stmt_info)
2078 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2079 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2080 size_zero_node);
2083 /* If the target supports a permute mask that reverses the elements in
2084 a vector of type VECTYPE, return that mask, otherwise return null. */
2086 static tree
2087 perm_mask_for_reverse (tree vectype)
2089 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2091 /* The encoding has a single stepped pattern. */
2092 vec_perm_builder sel (nunits, 1, 3);
2093 for (int i = 0; i < 3; ++i)
2094 sel.quick_push (nunits - 1 - i);
2096 vec_perm_indices indices (sel, 1, nunits);
2097 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2098 return NULL_TREE;
2099 return vect_gen_perm_mask_checked (vectype, indices);
2102 /* STMT_INFO is either a masked or unconditional store. Return the value
2103 being stored. */
2105 tree
2106 vect_get_store_rhs (stmt_vec_info stmt_info)
2108 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2110 gcc_assert (gimple_assign_single_p (assign));
2111 return gimple_assign_rhs1 (assign);
2113 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2115 internal_fn ifn = gimple_call_internal_fn (call);
2116 int index = internal_fn_stored_value_index (ifn);
2117 gcc_assert (index >= 0);
2118 return gimple_call_arg (call, index);
2120 gcc_unreachable ();
2123 /* A subroutine of get_load_store_type, with a subset of the same
2124 arguments. Handle the case where STMT_INFO is part of a grouped load
2125 or store.
2127 For stores, the statements in the group are all consecutive
2128 and there is no gap at the end. For loads, the statements in the
2129 group might not be consecutive; there can be gaps between statements
2130 as well as at the end. */
2132 static bool
2133 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2134 bool masked_p, vec_load_store_type vls_type,
2135 vect_memory_access_type *memory_access_type,
2136 gather_scatter_info *gs_info)
2138 vec_info *vinfo = stmt_info->vinfo;
2139 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2140 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2141 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2142 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2143 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2144 bool single_element_p = (stmt_info == first_stmt_info
2145 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2146 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2147 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2149 /* True if the vectorized statements would access beyond the last
2150 statement in the group. */
2151 bool overrun_p = false;
2153 /* True if we can cope with such overrun by peeling for gaps, so that
2154 there is at least one final scalar iteration after the vector loop. */
2155 bool can_overrun_p = (!masked_p
2156 && vls_type == VLS_LOAD
2157 && loop_vinfo
2158 && !loop->inner);
2160 /* There can only be a gap at the end of the group if the stride is
2161 known at compile time. */
2162 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2164 /* Stores can't yet have gaps. */
2165 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2167 if (slp)
2169 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2171 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2172 separated by the stride, until we have a complete vector.
2173 Fall back to scalar accesses if that isn't possible. */
2174 if (multiple_p (nunits, group_size))
2175 *memory_access_type = VMAT_STRIDED_SLP;
2176 else
2177 *memory_access_type = VMAT_ELEMENTWISE;
2179 else
2181 overrun_p = loop_vinfo && gap != 0;
2182 if (overrun_p && vls_type != VLS_LOAD)
2184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2185 "Grouped store with gaps requires"
2186 " non-consecutive accesses\n");
2187 return false;
2189 /* An overrun is fine if the trailing elements are smaller
2190 than the alignment boundary B. Every vector access will
2191 be a multiple of B and so we are guaranteed to access a
2192 non-gap element in the same B-sized block. */
2193 if (overrun_p
2194 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2195 / vect_get_scalar_dr_size (first_dr_info)))
2196 overrun_p = false;
2197 if (overrun_p && !can_overrun_p)
2199 if (dump_enabled_p ())
2200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2201 "Peeling for outer loop is not supported\n");
2202 return false;
2204 *memory_access_type = VMAT_CONTIGUOUS;
2207 else
2209 /* We can always handle this case using elementwise accesses,
2210 but see if something more efficient is available. */
2211 *memory_access_type = VMAT_ELEMENTWISE;
2213 /* If there is a gap at the end of the group then these optimizations
2214 would access excess elements in the last iteration. */
2215 bool would_overrun_p = (gap != 0);
2216 /* An overrun is fine if the trailing elements are smaller than the
2217 alignment boundary B. Every vector access will be a multiple of B
2218 and so we are guaranteed to access a non-gap element in the
2219 same B-sized block. */
2220 if (would_overrun_p
2221 && !masked_p
2222 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2223 / vect_get_scalar_dr_size (first_dr_info)))
2224 would_overrun_p = false;
2226 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2227 && (can_overrun_p || !would_overrun_p)
2228 && compare_step_with_zero (stmt_info) > 0)
2230 /* First cope with the degenerate case of a single-element
2231 vector. */
2232 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2233 *memory_access_type = VMAT_CONTIGUOUS;
2235 /* Otherwise try using LOAD/STORE_LANES. */
2236 if (*memory_access_type == VMAT_ELEMENTWISE
2237 && (vls_type == VLS_LOAD
2238 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2239 : vect_store_lanes_supported (vectype, group_size,
2240 masked_p)))
2242 *memory_access_type = VMAT_LOAD_STORE_LANES;
2243 overrun_p = would_overrun_p;
2246 /* If that fails, try using permuting loads. */
2247 if (*memory_access_type == VMAT_ELEMENTWISE
2248 && (vls_type == VLS_LOAD
2249 ? vect_grouped_load_supported (vectype, single_element_p,
2250 group_size)
2251 : vect_grouped_store_supported (vectype, group_size)))
2253 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2254 overrun_p = would_overrun_p;
2258 /* As a last resort, trying using a gather load or scatter store.
2260 ??? Although the code can handle all group sizes correctly,
2261 it probably isn't a win to use separate strided accesses based
2262 on nearby locations. Or, even if it's a win over scalar code,
2263 it might not be a win over vectorizing at a lower VF, if that
2264 allows us to use contiguous accesses. */
2265 if (*memory_access_type == VMAT_ELEMENTWISE
2266 && single_element_p
2267 && loop_vinfo
2268 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2269 masked_p, gs_info))
2270 *memory_access_type = VMAT_GATHER_SCATTER;
2273 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2275 /* STMT is the leader of the group. Check the operands of all the
2276 stmts of the group. */
2277 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2278 while (next_stmt_info)
2280 tree op = vect_get_store_rhs (next_stmt_info);
2281 enum vect_def_type dt;
2282 if (!vect_is_simple_use (op, vinfo, &dt))
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "use not simple.\n");
2287 return false;
2289 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2293 if (overrun_p)
2295 gcc_assert (can_overrun_p);
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "Data access with gaps requires scalar "
2299 "epilogue loop\n");
2300 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2303 return true;
2306 /* A subroutine of get_load_store_type, with a subset of the same
2307 arguments. Handle the case where STMT_INFO is a load or store that
2308 accesses consecutive elements with a negative step. */
2310 static vect_memory_access_type
2311 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2312 vec_load_store_type vls_type,
2313 unsigned int ncopies)
2315 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2316 dr_alignment_support alignment_support_scheme;
2318 if (ncopies > 1)
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322 "multiple types with negative step.\n");
2323 return VMAT_ELEMENTWISE;
2326 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2327 if (alignment_support_scheme != dr_aligned
2328 && alignment_support_scheme != dr_unaligned_supported)
2330 if (dump_enabled_p ())
2331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2332 "negative step but alignment required.\n");
2333 return VMAT_ELEMENTWISE;
2336 if (vls_type == VLS_STORE_INVARIANT)
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_NOTE, vect_location,
2340 "negative step with invariant source;"
2341 " no permute needed.\n");
2342 return VMAT_CONTIGUOUS_DOWN;
2345 if (!perm_mask_for_reverse (vectype))
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2349 "negative step and reversing not supported.\n");
2350 return VMAT_ELEMENTWISE;
2353 return VMAT_CONTIGUOUS_REVERSE;
2356 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2357 if there is a memory access type that the vectorized form can use,
2358 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2359 or scatters, fill in GS_INFO accordingly.
2361 SLP says whether we're performing SLP rather than loop vectorization.
2362 MASKED_P is true if the statement is conditional on a vectorized mask.
2363 VECTYPE is the vector type that the vectorized statements will use.
2364 NCOPIES is the number of vector statements that will be needed. */
2366 static bool
2367 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2368 bool masked_p, vec_load_store_type vls_type,
2369 unsigned int ncopies,
2370 vect_memory_access_type *memory_access_type,
2371 gather_scatter_info *gs_info)
2373 vec_info *vinfo = stmt_info->vinfo;
2374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2375 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2376 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2378 *memory_access_type = VMAT_GATHER_SCATTER;
2379 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2380 gcc_unreachable ();
2381 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2382 &gs_info->offset_dt,
2383 &gs_info->offset_vectype))
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "%s index use not simple.\n",
2388 vls_type == VLS_LOAD ? "gather" : "scatter");
2389 return false;
2392 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2394 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2395 vls_type, memory_access_type, gs_info))
2396 return false;
2398 else if (STMT_VINFO_STRIDED_P (stmt_info))
2400 gcc_assert (!slp);
2401 if (loop_vinfo
2402 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2403 masked_p, gs_info))
2404 *memory_access_type = VMAT_GATHER_SCATTER;
2405 else
2406 *memory_access_type = VMAT_ELEMENTWISE;
2408 else
2410 int cmp = compare_step_with_zero (stmt_info);
2411 if (cmp < 0)
2412 *memory_access_type = get_negative_load_store_type
2413 (stmt_info, vectype, vls_type, ncopies);
2414 else if (cmp == 0)
2416 gcc_assert (vls_type == VLS_LOAD);
2417 *memory_access_type = VMAT_INVARIANT;
2419 else
2420 *memory_access_type = VMAT_CONTIGUOUS;
2423 if ((*memory_access_type == VMAT_ELEMENTWISE
2424 || *memory_access_type == VMAT_STRIDED_SLP)
2425 && !nunits.is_constant ())
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "Not using elementwise accesses due to variable "
2430 "vectorization factor.\n");
2431 return false;
2434 /* FIXME: At the moment the cost model seems to underestimate the
2435 cost of using elementwise accesses. This check preserves the
2436 traditional behavior until that can be fixed. */
2437 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2438 if (!first_stmt_info)
2439 first_stmt_info = stmt_info;
2440 if (*memory_access_type == VMAT_ELEMENTWISE
2441 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2442 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2443 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2444 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2448 "not falling back to elementwise accesses\n");
2449 return false;
2451 return true;
2454 /* Return true if boolean argument MASK is suitable for vectorizing
2455 conditional load or store STMT_INFO. When returning true, store the type
2456 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2457 in *MASK_VECTYPE_OUT. */
2459 static bool
2460 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2461 vect_def_type *mask_dt_out,
2462 tree *mask_vectype_out)
2464 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2466 if (dump_enabled_p ())
2467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2468 "mask argument is not a boolean.\n");
2469 return false;
2472 if (TREE_CODE (mask) != SSA_NAME)
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2476 "mask argument is not an SSA name.\n");
2477 return false;
2480 enum vect_def_type mask_dt;
2481 tree mask_vectype;
2482 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2486 "mask use not simple.\n");
2487 return false;
2490 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2491 if (!mask_vectype)
2492 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2494 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2498 "could not find an appropriate vector mask type.\n");
2499 return false;
2502 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2503 TYPE_VECTOR_SUBPARTS (vectype)))
2505 if (dump_enabled_p ())
2506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2507 "vector mask type %T",
2508 " does not match vector data type %T.\n",
2509 mask_vectype, vectype);
2511 return false;
2514 *mask_dt_out = mask_dt;
2515 *mask_vectype_out = mask_vectype;
2516 return true;
2519 /* Return true if stored value RHS is suitable for vectorizing store
2520 statement STMT_INFO. When returning true, store the type of the
2521 definition in *RHS_DT_OUT, the type of the vectorized store value in
2522 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2524 static bool
2525 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2526 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2527 vec_load_store_type *vls_type_out)
2529 /* In the case this is a store from a constant make sure
2530 native_encode_expr can handle it. */
2531 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2533 if (dump_enabled_p ())
2534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2535 "cannot encode constant as a byte sequence.\n");
2536 return false;
2539 enum vect_def_type rhs_dt;
2540 tree rhs_vectype;
2541 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "use not simple.\n");
2546 return false;
2549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2550 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2552 if (dump_enabled_p ())
2553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2554 "incompatible vector types.\n");
2555 return false;
2558 *rhs_dt_out = rhs_dt;
2559 *rhs_vectype_out = rhs_vectype;
2560 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2561 *vls_type_out = VLS_STORE_INVARIANT;
2562 else
2563 *vls_type_out = VLS_STORE;
2564 return true;
2567 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2568 Note that we support masks with floating-point type, in which case the
2569 floats are interpreted as a bitmask. */
2571 static tree
2572 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2574 if (TREE_CODE (masktype) == INTEGER_TYPE)
2575 return build_int_cst (masktype, -1);
2576 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2578 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2579 mask = build_vector_from_val (masktype, mask);
2580 return vect_init_vector (stmt_info, mask, masktype, NULL);
2582 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2584 REAL_VALUE_TYPE r;
2585 long tmp[6];
2586 for (int j = 0; j < 6; ++j)
2587 tmp[j] = -1;
2588 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2589 tree mask = build_real (TREE_TYPE (masktype), r);
2590 mask = build_vector_from_val (masktype, mask);
2591 return vect_init_vector (stmt_info, mask, masktype, NULL);
2593 gcc_unreachable ();
2596 /* Build an all-zero merge value of type VECTYPE while vectorizing
2597 STMT_INFO as a gather load. */
2599 static tree
2600 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2602 tree merge;
2603 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2604 merge = build_int_cst (TREE_TYPE (vectype), 0);
2605 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2607 REAL_VALUE_TYPE r;
2608 long tmp[6];
2609 for (int j = 0; j < 6; ++j)
2610 tmp[j] = 0;
2611 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2612 merge = build_real (TREE_TYPE (vectype), r);
2614 else
2615 gcc_unreachable ();
2616 merge = build_vector_from_val (vectype, merge);
2617 return vect_init_vector (stmt_info, merge, vectype, NULL);
2620 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2621 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2622 the gather load operation. If the load is conditional, MASK is the
2623 unvectorized condition and MASK_DT is its definition type, otherwise
2624 MASK is null. */
2626 static void
2627 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2628 gimple_stmt_iterator *gsi,
2629 stmt_vec_info *vec_stmt,
2630 gather_scatter_info *gs_info,
2631 tree mask)
2633 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2634 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2635 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2636 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2637 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2638 edge pe = loop_preheader_edge (loop);
2639 enum { NARROW, NONE, WIDEN } modifier;
2640 poly_uint64 gather_off_nunits
2641 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2643 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2644 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2645 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2646 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2647 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2648 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2649 tree scaletype = TREE_VALUE (arglist);
2650 gcc_checking_assert (types_compatible_p (srctype, rettype)
2651 && (!mask || types_compatible_p (srctype, masktype)));
2653 tree perm_mask = NULL_TREE;
2654 tree mask_perm_mask = NULL_TREE;
2655 if (known_eq (nunits, gather_off_nunits))
2656 modifier = NONE;
2657 else if (known_eq (nunits * 2, gather_off_nunits))
2659 modifier = WIDEN;
2661 /* Currently widening gathers and scatters are only supported for
2662 fixed-length vectors. */
2663 int count = gather_off_nunits.to_constant ();
2664 vec_perm_builder sel (count, count, 1);
2665 for (int i = 0; i < count; ++i)
2666 sel.quick_push (i | (count / 2));
2668 vec_perm_indices indices (sel, 1, count);
2669 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2670 indices);
2672 else if (known_eq (nunits, gather_off_nunits * 2))
2674 modifier = NARROW;
2676 /* Currently narrowing gathers and scatters are only supported for
2677 fixed-length vectors. */
2678 int count = nunits.to_constant ();
2679 vec_perm_builder sel (count, count, 1);
2680 sel.quick_grow (count);
2681 for (int i = 0; i < count; ++i)
2682 sel[i] = i < count / 2 ? i : i + count / 2;
2683 vec_perm_indices indices (sel, 2, count);
2684 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2686 ncopies *= 2;
2688 if (mask)
2690 for (int i = 0; i < count; ++i)
2691 sel[i] = i | (count / 2);
2692 indices.new_vector (sel, 2, count);
2693 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2696 else
2697 gcc_unreachable ();
2699 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2700 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2702 tree ptr = fold_convert (ptrtype, gs_info->base);
2703 if (!is_gimple_min_invariant (ptr))
2705 gimple_seq seq;
2706 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2707 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2708 gcc_assert (!new_bb);
2711 tree scale = build_int_cst (scaletype, gs_info->scale);
2713 tree vec_oprnd0 = NULL_TREE;
2714 tree vec_mask = NULL_TREE;
2715 tree src_op = NULL_TREE;
2716 tree mask_op = NULL_TREE;
2717 tree prev_res = NULL_TREE;
2718 stmt_vec_info prev_stmt_info = NULL;
2720 if (!mask)
2722 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2723 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2726 for (int j = 0; j < ncopies; ++j)
2728 tree op, var;
2729 if (modifier == WIDEN && (j & 1))
2730 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2731 perm_mask, stmt_info, gsi);
2732 else if (j == 0)
2733 op = vec_oprnd0
2734 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2735 else
2736 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2737 vec_oprnd0);
2739 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2741 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2742 TYPE_VECTOR_SUBPARTS (idxtype)));
2743 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2744 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2745 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2746 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2747 op = var;
2750 if (mask)
2752 if (mask_perm_mask && (j & 1))
2753 mask_op = permute_vec_elements (mask_op, mask_op,
2754 mask_perm_mask, stmt_info, gsi);
2755 else
2757 if (j == 0)
2758 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2759 else
2760 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2761 vec_mask);
2763 mask_op = vec_mask;
2764 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2766 gcc_assert
2767 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2768 TYPE_VECTOR_SUBPARTS (masktype)));
2769 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2770 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2771 gassign *new_stmt
2772 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2773 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2774 mask_op = var;
2777 src_op = mask_op;
2780 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2781 mask_op, scale);
2783 stmt_vec_info new_stmt_info;
2784 if (!useless_type_conversion_p (vectype, rettype))
2786 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2787 TYPE_VECTOR_SUBPARTS (rettype)));
2788 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2789 gimple_call_set_lhs (new_call, op);
2790 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2791 var = make_ssa_name (vec_dest);
2792 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2793 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2794 new_stmt_info
2795 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2797 else
2799 var = make_ssa_name (vec_dest, new_call);
2800 gimple_call_set_lhs (new_call, var);
2801 new_stmt_info
2802 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2805 if (modifier == NARROW)
2807 if ((j & 1) == 0)
2809 prev_res = var;
2810 continue;
2812 var = permute_vec_elements (prev_res, var, perm_mask,
2813 stmt_info, gsi);
2814 new_stmt_info = loop_vinfo->lookup_def (var);
2817 if (prev_stmt_info == NULL)
2818 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2819 else
2820 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2821 prev_stmt_info = new_stmt_info;
2825 /* Prepare the base and offset in GS_INFO for vectorization.
2826 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2827 to the vectorized offset argument for the first copy of STMT_INFO.
2828 STMT_INFO is the statement described by GS_INFO and LOOP is the
2829 containing loop. */
2831 static void
2832 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2833 gather_scatter_info *gs_info,
2834 tree *dataref_ptr, tree *vec_offset)
2836 gimple_seq stmts = NULL;
2837 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2838 if (stmts != NULL)
2840 basic_block new_bb;
2841 edge pe = loop_preheader_edge (loop);
2842 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2843 gcc_assert (!new_bb);
2845 tree offset_type = TREE_TYPE (gs_info->offset);
2846 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2847 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2848 offset_vectype);
2851 /* Prepare to implement a grouped or strided load or store using
2852 the gather load or scatter store operation described by GS_INFO.
2853 STMT_INFO is the load or store statement.
2855 Set *DATAREF_BUMP to the amount that should be added to the base
2856 address after each copy of the vectorized statement. Set *VEC_OFFSET
2857 to an invariant offset vector in which element I has the value
2858 I * DR_STEP / SCALE. */
2860 static void
2861 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2862 loop_vec_info loop_vinfo,
2863 gather_scatter_info *gs_info,
2864 tree *dataref_bump, tree *vec_offset)
2866 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2867 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2868 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2869 gimple_seq stmts;
2871 tree bump = size_binop (MULT_EXPR,
2872 fold_convert (sizetype, DR_STEP (dr)),
2873 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2874 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2875 if (stmts)
2876 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2878 /* The offset given in GS_INFO can have pointer type, so use the element
2879 type of the vector instead. */
2880 tree offset_type = TREE_TYPE (gs_info->offset);
2881 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2882 offset_type = TREE_TYPE (offset_vectype);
2884 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2885 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2886 ssize_int (gs_info->scale));
2887 step = fold_convert (offset_type, step);
2888 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2890 /* Create {0, X, X*2, X*3, ...}. */
2891 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2892 build_zero_cst (offset_type), step);
2893 if (stmts)
2894 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2897 /* Return the amount that should be added to a vector pointer to move
2898 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2899 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2900 vectorization. */
2902 static tree
2903 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
2904 vect_memory_access_type memory_access_type)
2906 if (memory_access_type == VMAT_INVARIANT)
2907 return size_zero_node;
2909 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2910 tree step = vect_dr_behavior (dr_info)->step;
2911 if (tree_int_cst_sgn (step) == -1)
2912 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2913 return iv_step;
2916 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2918 static bool
2919 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2920 stmt_vec_info *vec_stmt, slp_tree slp_node,
2921 tree vectype_in, stmt_vector_for_cost *cost_vec)
2923 tree op, vectype;
2924 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2925 vec_info *vinfo = stmt_info->vinfo;
2926 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2927 unsigned ncopies;
2929 op = gimple_call_arg (stmt, 0);
2930 vectype = STMT_VINFO_VECTYPE (stmt_info);
2931 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2933 /* Multiple types in SLP are handled by creating the appropriate number of
2934 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2935 case of SLP. */
2936 if (slp_node)
2937 ncopies = 1;
2938 else
2939 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2941 gcc_assert (ncopies >= 1);
2943 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2944 if (! char_vectype)
2945 return false;
2947 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2948 unsigned word_bytes;
2949 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2950 return false;
2952 /* The encoding uses one stepped pattern for each byte in the word. */
2953 vec_perm_builder elts (num_bytes, word_bytes, 3);
2954 for (unsigned i = 0; i < 3; ++i)
2955 for (unsigned j = 0; j < word_bytes; ++j)
2956 elts.quick_push ((i + 1) * word_bytes - j - 1);
2958 vec_perm_indices indices (elts, 1, num_bytes);
2959 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2960 return false;
2962 if (! vec_stmt)
2964 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2965 DUMP_VECT_SCOPE ("vectorizable_bswap");
2966 if (! slp_node)
2968 record_stmt_cost (cost_vec,
2969 1, vector_stmt, stmt_info, 0, vect_prologue);
2970 record_stmt_cost (cost_vec,
2971 ncopies, vec_perm, stmt_info, 0, vect_body);
2973 return true;
2976 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2978 /* Transform. */
2979 vec<tree> vec_oprnds = vNULL;
2980 stmt_vec_info new_stmt_info = NULL;
2981 stmt_vec_info prev_stmt_info = NULL;
2982 for (unsigned j = 0; j < ncopies; j++)
2984 /* Handle uses. */
2985 if (j == 0)
2986 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
2987 else
2988 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
2990 /* Arguments are ready. create the new vector stmt. */
2991 unsigned i;
2992 tree vop;
2993 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2995 gimple *new_stmt;
2996 tree tem = make_ssa_name (char_vectype);
2997 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2998 char_vectype, vop));
2999 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3000 tree tem2 = make_ssa_name (char_vectype);
3001 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3002 tem, tem, bswap_vconst);
3003 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3004 tem = make_ssa_name (vectype);
3005 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3006 vectype, tem2));
3007 new_stmt_info
3008 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3009 if (slp_node)
3010 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3013 if (slp_node)
3014 continue;
3016 if (j == 0)
3017 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3018 else
3019 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3021 prev_stmt_info = new_stmt_info;
3024 vec_oprnds.release ();
3025 return true;
3028 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3029 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3030 in a single step. On success, store the binary pack code in
3031 *CONVERT_CODE. */
3033 static bool
3034 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3035 tree_code *convert_code)
3037 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3038 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3039 return false;
3041 tree_code code;
3042 int multi_step_cvt = 0;
3043 auto_vec <tree, 8> interm_types;
3044 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3045 &code, &multi_step_cvt,
3046 &interm_types)
3047 || multi_step_cvt)
3048 return false;
3050 *convert_code = code;
3051 return true;
3054 /* Function vectorizable_call.
3056 Check if STMT_INFO performs a function call that can be vectorized.
3057 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3058 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3059 Return true if STMT_INFO is vectorizable in this way. */
3061 static bool
3062 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3063 stmt_vec_info *vec_stmt, slp_tree slp_node,
3064 stmt_vector_for_cost *cost_vec)
3066 gcall *stmt;
3067 tree vec_dest;
3068 tree scalar_dest;
3069 tree op;
3070 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3071 stmt_vec_info prev_stmt_info;
3072 tree vectype_out, vectype_in;
3073 poly_uint64 nunits_in;
3074 poly_uint64 nunits_out;
3075 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3076 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3077 vec_info *vinfo = stmt_info->vinfo;
3078 tree fndecl, new_temp, rhs_type;
3079 enum vect_def_type dt[4]
3080 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3081 vect_unknown_def_type };
3082 int ndts = ARRAY_SIZE (dt);
3083 int ncopies, j;
3084 auto_vec<tree, 8> vargs;
3085 auto_vec<tree, 8> orig_vargs;
3086 enum { NARROW, NONE, WIDEN } modifier;
3087 size_t i, nargs;
3088 tree lhs;
3090 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3091 return false;
3093 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3094 && ! vec_stmt)
3095 return false;
3097 /* Is STMT_INFO a vectorizable call? */
3098 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3099 if (!stmt)
3100 return false;
3102 if (gimple_call_internal_p (stmt)
3103 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3104 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3105 /* Handled by vectorizable_load and vectorizable_store. */
3106 return false;
3108 if (gimple_call_lhs (stmt) == NULL_TREE
3109 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3110 return false;
3112 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3114 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3116 /* Process function arguments. */
3117 rhs_type = NULL_TREE;
3118 vectype_in = NULL_TREE;
3119 nargs = gimple_call_num_args (stmt);
3121 /* Bail out if the function has more than three arguments, we do not have
3122 interesting builtin functions to vectorize with more than two arguments
3123 except for fma. No arguments is also not good. */
3124 if (nargs == 0 || nargs > 4)
3125 return false;
3127 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3128 combined_fn cfn = gimple_call_combined_fn (stmt);
3129 if (cfn == CFN_GOMP_SIMD_LANE)
3131 nargs = 0;
3132 rhs_type = unsigned_type_node;
3135 int mask_opno = -1;
3136 if (internal_fn_p (cfn))
3137 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3139 for (i = 0; i < nargs; i++)
3141 tree opvectype;
3143 op = gimple_call_arg (stmt, i);
3144 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3146 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3148 "use not simple.\n");
3149 return false;
3152 /* Skip the mask argument to an internal function. This operand
3153 has been converted via a pattern if necessary. */
3154 if ((int) i == mask_opno)
3155 continue;
3157 /* We can only handle calls with arguments of the same type. */
3158 if (rhs_type
3159 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3161 if (dump_enabled_p ())
3162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3163 "argument types differ.\n");
3164 return false;
3166 if (!rhs_type)
3167 rhs_type = TREE_TYPE (op);
3169 if (!vectype_in)
3170 vectype_in = opvectype;
3171 else if (opvectype
3172 && opvectype != vectype_in)
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3176 "argument vector types differ.\n");
3177 return false;
3180 /* If all arguments are external or constant defs use a vector type with
3181 the same size as the output vector type. */
3182 if (!vectype_in)
3183 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3184 if (vec_stmt)
3185 gcc_assert (vectype_in);
3186 if (!vectype_in)
3188 if (dump_enabled_p ())
3189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3190 "no vectype for scalar type %T\n", rhs_type);
3192 return false;
3195 /* FORNOW */
3196 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3197 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3198 if (known_eq (nunits_in * 2, nunits_out))
3199 modifier = NARROW;
3200 else if (known_eq (nunits_out, nunits_in))
3201 modifier = NONE;
3202 else if (known_eq (nunits_out * 2, nunits_in))
3203 modifier = WIDEN;
3204 else
3205 return false;
3207 /* We only handle functions that do not read or clobber memory. */
3208 if (gimple_vuse (stmt))
3210 if (dump_enabled_p ())
3211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3212 "function reads from or writes to memory.\n");
3213 return false;
3216 /* For now, we only vectorize functions if a target specific builtin
3217 is available. TODO -- in some cases, it might be profitable to
3218 insert the calls for pieces of the vector, in order to be able
3219 to vectorize other operations in the loop. */
3220 fndecl = NULL_TREE;
3221 internal_fn ifn = IFN_LAST;
3222 tree callee = gimple_call_fndecl (stmt);
3224 /* First try using an internal function. */
3225 tree_code convert_code = ERROR_MARK;
3226 if (cfn != CFN_LAST
3227 && (modifier == NONE
3228 || (modifier == NARROW
3229 && simple_integer_narrowing (vectype_out, vectype_in,
3230 &convert_code))))
3231 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3232 vectype_in);
3234 /* If that fails, try asking for a target-specific built-in function. */
3235 if (ifn == IFN_LAST)
3237 if (cfn != CFN_LAST)
3238 fndecl = targetm.vectorize.builtin_vectorized_function
3239 (cfn, vectype_out, vectype_in);
3240 else if (callee)
3241 fndecl = targetm.vectorize.builtin_md_vectorized_function
3242 (callee, vectype_out, vectype_in);
3245 if (ifn == IFN_LAST && !fndecl)
3247 if (cfn == CFN_GOMP_SIMD_LANE
3248 && !slp_node
3249 && loop_vinfo
3250 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3251 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3252 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3253 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3255 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3256 { 0, 1, 2, ... vf - 1 } vector. */
3257 gcc_assert (nargs == 0);
3259 else if (modifier == NONE
3260 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3261 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3262 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3263 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3264 vectype_in, cost_vec);
3265 else
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3269 "function is not vectorizable.\n");
3270 return false;
3274 if (slp_node)
3275 ncopies = 1;
3276 else if (modifier == NARROW && ifn == IFN_LAST)
3277 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3278 else
3279 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3281 /* Sanity check: make sure that at least one copy of the vectorized stmt
3282 needs to be generated. */
3283 gcc_assert (ncopies >= 1);
3285 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3286 if (!vec_stmt) /* transformation not required. */
3288 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3289 DUMP_VECT_SCOPE ("vectorizable_call");
3290 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3291 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3292 record_stmt_cost (cost_vec, ncopies / 2,
3293 vec_promote_demote, stmt_info, 0, vect_body);
3295 if (loop_vinfo && mask_opno >= 0)
3297 unsigned int nvectors = (slp_node
3298 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3299 : ncopies);
3300 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3302 return true;
3305 /* Transform. */
3307 if (dump_enabled_p ())
3308 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3310 /* Handle def. */
3311 scalar_dest = gimple_call_lhs (stmt);
3312 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3314 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3316 stmt_vec_info new_stmt_info = NULL;
3317 prev_stmt_info = NULL;
3318 if (modifier == NONE || ifn != IFN_LAST)
3320 tree prev_res = NULL_TREE;
3321 vargs.safe_grow (nargs);
3322 orig_vargs.safe_grow (nargs);
3323 for (j = 0; j < ncopies; ++j)
3325 /* Build argument list for the vectorized call. */
3326 if (slp_node)
3328 auto_vec<vec<tree> > vec_defs (nargs);
3329 vec<tree> vec_oprnds0;
3331 for (i = 0; i < nargs; i++)
3332 vargs[i] = gimple_call_arg (stmt, i);
3333 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3334 vec_oprnds0 = vec_defs[0];
3336 /* Arguments are ready. Create the new vector stmt. */
3337 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3339 size_t k;
3340 for (k = 0; k < nargs; k++)
3342 vec<tree> vec_oprndsk = vec_defs[k];
3343 vargs[k] = vec_oprndsk[i];
3345 if (modifier == NARROW)
3347 /* We don't define any narrowing conditional functions
3348 at present. */
3349 gcc_assert (mask_opno < 0);
3350 tree half_res = make_ssa_name (vectype_in);
3351 gcall *call
3352 = gimple_build_call_internal_vec (ifn, vargs);
3353 gimple_call_set_lhs (call, half_res);
3354 gimple_call_set_nothrow (call, true);
3355 new_stmt_info
3356 = vect_finish_stmt_generation (stmt_info, call, gsi);
3357 if ((i & 1) == 0)
3359 prev_res = half_res;
3360 continue;
3362 new_temp = make_ssa_name (vec_dest);
3363 gimple *new_stmt
3364 = gimple_build_assign (new_temp, convert_code,
3365 prev_res, half_res);
3366 new_stmt_info
3367 = vect_finish_stmt_generation (stmt_info, new_stmt,
3368 gsi);
3370 else
3372 if (mask_opno >= 0 && masked_loop_p)
3374 unsigned int vec_num = vec_oprnds0.length ();
3375 /* Always true for SLP. */
3376 gcc_assert (ncopies == 1);
3377 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3378 vectype_out, i);
3379 vargs[mask_opno] = prepare_load_store_mask
3380 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3383 gcall *call;
3384 if (ifn != IFN_LAST)
3385 call = gimple_build_call_internal_vec (ifn, vargs);
3386 else
3387 call = gimple_build_call_vec (fndecl, vargs);
3388 new_temp = make_ssa_name (vec_dest, call);
3389 gimple_call_set_lhs (call, new_temp);
3390 gimple_call_set_nothrow (call, true);
3391 new_stmt_info
3392 = vect_finish_stmt_generation (stmt_info, call, gsi);
3394 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3397 for (i = 0; i < nargs; i++)
3399 vec<tree> vec_oprndsi = vec_defs[i];
3400 vec_oprndsi.release ();
3402 continue;
3405 for (i = 0; i < nargs; i++)
3407 op = gimple_call_arg (stmt, i);
3408 if (j == 0)
3409 vec_oprnd0
3410 = vect_get_vec_def_for_operand (op, stmt_info);
3411 else
3412 vec_oprnd0
3413 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3415 orig_vargs[i] = vargs[i] = vec_oprnd0;
3418 if (mask_opno >= 0 && masked_loop_p)
3420 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3421 vectype_out, j);
3422 vargs[mask_opno]
3423 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3424 vargs[mask_opno], gsi);
3427 if (cfn == CFN_GOMP_SIMD_LANE)
3429 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3430 tree new_var
3431 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3432 gimple *init_stmt = gimple_build_assign (new_var, cst);
3433 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3434 new_temp = make_ssa_name (vec_dest);
3435 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3436 new_stmt_info
3437 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3439 else if (modifier == NARROW)
3441 /* We don't define any narrowing conditional functions at
3442 present. */
3443 gcc_assert (mask_opno < 0);
3444 tree half_res = make_ssa_name (vectype_in);
3445 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3446 gimple_call_set_lhs (call, half_res);
3447 gimple_call_set_nothrow (call, true);
3448 new_stmt_info
3449 = vect_finish_stmt_generation (stmt_info, call, gsi);
3450 if ((j & 1) == 0)
3452 prev_res = half_res;
3453 continue;
3455 new_temp = make_ssa_name (vec_dest);
3456 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3457 prev_res, half_res);
3458 new_stmt_info
3459 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3461 else
3463 gcall *call;
3464 if (ifn != IFN_LAST)
3465 call = gimple_build_call_internal_vec (ifn, vargs);
3466 else
3467 call = gimple_build_call_vec (fndecl, vargs);
3468 new_temp = make_ssa_name (vec_dest, call);
3469 gimple_call_set_lhs (call, new_temp);
3470 gimple_call_set_nothrow (call, true);
3471 new_stmt_info
3472 = vect_finish_stmt_generation (stmt_info, call, gsi);
3475 if (j == (modifier == NARROW ? 1 : 0))
3476 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3477 else
3478 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3480 prev_stmt_info = new_stmt_info;
3483 else if (modifier == NARROW)
3485 /* We don't define any narrowing conditional functions at present. */
3486 gcc_assert (mask_opno < 0);
3487 for (j = 0; j < ncopies; ++j)
3489 /* Build argument list for the vectorized call. */
3490 if (j == 0)
3491 vargs.create (nargs * 2);
3492 else
3493 vargs.truncate (0);
3495 if (slp_node)
3497 auto_vec<vec<tree> > vec_defs (nargs);
3498 vec<tree> vec_oprnds0;
3500 for (i = 0; i < nargs; i++)
3501 vargs.quick_push (gimple_call_arg (stmt, i));
3502 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3503 vec_oprnds0 = vec_defs[0];
3505 /* Arguments are ready. Create the new vector stmt. */
3506 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3508 size_t k;
3509 vargs.truncate (0);
3510 for (k = 0; k < nargs; k++)
3512 vec<tree> vec_oprndsk = vec_defs[k];
3513 vargs.quick_push (vec_oprndsk[i]);
3514 vargs.quick_push (vec_oprndsk[i + 1]);
3516 gcall *call;
3517 if (ifn != IFN_LAST)
3518 call = gimple_build_call_internal_vec (ifn, vargs);
3519 else
3520 call = gimple_build_call_vec (fndecl, vargs);
3521 new_temp = make_ssa_name (vec_dest, call);
3522 gimple_call_set_lhs (call, new_temp);
3523 gimple_call_set_nothrow (call, true);
3524 new_stmt_info
3525 = vect_finish_stmt_generation (stmt_info, call, gsi);
3526 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3529 for (i = 0; i < nargs; i++)
3531 vec<tree> vec_oprndsi = vec_defs[i];
3532 vec_oprndsi.release ();
3534 continue;
3537 for (i = 0; i < nargs; i++)
3539 op = gimple_call_arg (stmt, i);
3540 if (j == 0)
3542 vec_oprnd0
3543 = vect_get_vec_def_for_operand (op, stmt_info);
3544 vec_oprnd1
3545 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3547 else
3549 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3550 2 * i + 1);
3551 vec_oprnd0
3552 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3553 vec_oprnd1
3554 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3557 vargs.quick_push (vec_oprnd0);
3558 vargs.quick_push (vec_oprnd1);
3561 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3562 new_temp = make_ssa_name (vec_dest, new_stmt);
3563 gimple_call_set_lhs (new_stmt, new_temp);
3564 new_stmt_info
3565 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3567 if (j == 0)
3568 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3569 else
3570 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3572 prev_stmt_info = new_stmt_info;
3575 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3577 else
3578 /* No current target implements this case. */
3579 return false;
3581 vargs.release ();
3583 /* The call in STMT might prevent it from being removed in dce.
3584 We however cannot remove it here, due to the way the ssa name
3585 it defines is mapped to the new definition. So just replace
3586 rhs of the statement with something harmless. */
3588 if (slp_node)
3589 return true;
3591 stmt_info = vect_orig_stmt (stmt_info);
3592 lhs = gimple_get_lhs (stmt_info->stmt);
3594 gassign *new_stmt
3595 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3596 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3598 return true;
3602 struct simd_call_arg_info
3604 tree vectype;
3605 tree op;
3606 HOST_WIDE_INT linear_step;
3607 enum vect_def_type dt;
3608 unsigned int align;
3609 bool simd_lane_linear;
3612 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3613 is linear within simd lane (but not within whole loop), note it in
3614 *ARGINFO. */
3616 static void
3617 vect_simd_lane_linear (tree op, struct loop *loop,
3618 struct simd_call_arg_info *arginfo)
3620 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3622 if (!is_gimple_assign (def_stmt)
3623 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3624 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3625 return;
3627 tree base = gimple_assign_rhs1 (def_stmt);
3628 HOST_WIDE_INT linear_step = 0;
3629 tree v = gimple_assign_rhs2 (def_stmt);
3630 while (TREE_CODE (v) == SSA_NAME)
3632 tree t;
3633 def_stmt = SSA_NAME_DEF_STMT (v);
3634 if (is_gimple_assign (def_stmt))
3635 switch (gimple_assign_rhs_code (def_stmt))
3637 case PLUS_EXPR:
3638 t = gimple_assign_rhs2 (def_stmt);
3639 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3640 return;
3641 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3642 v = gimple_assign_rhs1 (def_stmt);
3643 continue;
3644 case MULT_EXPR:
3645 t = gimple_assign_rhs2 (def_stmt);
3646 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3647 return;
3648 linear_step = tree_to_shwi (t);
3649 v = gimple_assign_rhs1 (def_stmt);
3650 continue;
3651 CASE_CONVERT:
3652 t = gimple_assign_rhs1 (def_stmt);
3653 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3654 || (TYPE_PRECISION (TREE_TYPE (v))
3655 < TYPE_PRECISION (TREE_TYPE (t))))
3656 return;
3657 if (!linear_step)
3658 linear_step = 1;
3659 v = t;
3660 continue;
3661 default:
3662 return;
3664 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3665 && loop->simduid
3666 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3667 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3668 == loop->simduid))
3670 if (!linear_step)
3671 linear_step = 1;
3672 arginfo->linear_step = linear_step;
3673 arginfo->op = base;
3674 arginfo->simd_lane_linear = true;
3675 return;
3680 /* Return the number of elements in vector type VECTYPE, which is associated
3681 with a SIMD clone. At present these vectors always have a constant
3682 length. */
3684 static unsigned HOST_WIDE_INT
3685 simd_clone_subparts (tree vectype)
3687 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3690 /* Function vectorizable_simd_clone_call.
3692 Check if STMT_INFO performs a function call that can be vectorized
3693 by calling a simd clone of the function.
3694 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3695 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3696 Return true if STMT_INFO is vectorizable in this way. */
3698 static bool
3699 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3700 gimple_stmt_iterator *gsi,
3701 stmt_vec_info *vec_stmt, slp_tree slp_node,
3702 stmt_vector_for_cost *)
3704 tree vec_dest;
3705 tree scalar_dest;
3706 tree op, type;
3707 tree vec_oprnd0 = NULL_TREE;
3708 stmt_vec_info prev_stmt_info;
3709 tree vectype;
3710 unsigned int nunits;
3711 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3712 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3713 vec_info *vinfo = stmt_info->vinfo;
3714 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3715 tree fndecl, new_temp;
3716 int ncopies, j;
3717 auto_vec<simd_call_arg_info> arginfo;
3718 vec<tree> vargs = vNULL;
3719 size_t i, nargs;
3720 tree lhs, rtype, ratype;
3721 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3723 /* Is STMT a vectorizable call? */
3724 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3725 if (!stmt)
3726 return false;
3728 fndecl = gimple_call_fndecl (stmt);
3729 if (fndecl == NULL_TREE)
3730 return false;
3732 struct cgraph_node *node = cgraph_node::get (fndecl);
3733 if (node == NULL || node->simd_clones == NULL)
3734 return false;
3736 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3737 return false;
3739 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3740 && ! vec_stmt)
3741 return false;
3743 if (gimple_call_lhs (stmt)
3744 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3745 return false;
3747 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3749 vectype = STMT_VINFO_VECTYPE (stmt_info);
3751 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3752 return false;
3754 /* FORNOW */
3755 if (slp_node)
3756 return false;
3758 /* Process function arguments. */
3759 nargs = gimple_call_num_args (stmt);
3761 /* Bail out if the function has zero arguments. */
3762 if (nargs == 0)
3763 return false;
3765 arginfo.reserve (nargs, true);
3767 for (i = 0; i < nargs; i++)
3769 simd_call_arg_info thisarginfo;
3770 affine_iv iv;
3772 thisarginfo.linear_step = 0;
3773 thisarginfo.align = 0;
3774 thisarginfo.op = NULL_TREE;
3775 thisarginfo.simd_lane_linear = false;
3777 op = gimple_call_arg (stmt, i);
3778 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3779 &thisarginfo.vectype)
3780 || thisarginfo.dt == vect_uninitialized_def)
3782 if (dump_enabled_p ())
3783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3784 "use not simple.\n");
3785 return false;
3788 if (thisarginfo.dt == vect_constant_def
3789 || thisarginfo.dt == vect_external_def)
3790 gcc_assert (thisarginfo.vectype == NULL_TREE);
3791 else
3792 gcc_assert (thisarginfo.vectype != NULL_TREE);
3794 /* For linear arguments, the analyze phase should have saved
3795 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3796 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3797 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3799 gcc_assert (vec_stmt);
3800 thisarginfo.linear_step
3801 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3802 thisarginfo.op
3803 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3804 thisarginfo.simd_lane_linear
3805 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3806 == boolean_true_node);
3807 /* If loop has been peeled for alignment, we need to adjust it. */
3808 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3809 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3810 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3812 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3813 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3814 tree opt = TREE_TYPE (thisarginfo.op);
3815 bias = fold_convert (TREE_TYPE (step), bias);
3816 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3817 thisarginfo.op
3818 = fold_build2 (POINTER_TYPE_P (opt)
3819 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3820 thisarginfo.op, bias);
3823 else if (!vec_stmt
3824 && thisarginfo.dt != vect_constant_def
3825 && thisarginfo.dt != vect_external_def
3826 && loop_vinfo
3827 && TREE_CODE (op) == SSA_NAME
3828 && simple_iv (loop, loop_containing_stmt (stmt), op,
3829 &iv, false)
3830 && tree_fits_shwi_p (iv.step))
3832 thisarginfo.linear_step = tree_to_shwi (iv.step);
3833 thisarginfo.op = iv.base;
3835 else if ((thisarginfo.dt == vect_constant_def
3836 || thisarginfo.dt == vect_external_def)
3837 && POINTER_TYPE_P (TREE_TYPE (op)))
3838 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3839 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3840 linear too. */
3841 if (POINTER_TYPE_P (TREE_TYPE (op))
3842 && !thisarginfo.linear_step
3843 && !vec_stmt
3844 && thisarginfo.dt != vect_constant_def
3845 && thisarginfo.dt != vect_external_def
3846 && loop_vinfo
3847 && !slp_node
3848 && TREE_CODE (op) == SSA_NAME)
3849 vect_simd_lane_linear (op, loop, &thisarginfo);
3851 arginfo.quick_push (thisarginfo);
3854 unsigned HOST_WIDE_INT vf;
3855 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3857 if (dump_enabled_p ())
3858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3859 "not considering SIMD clones; not yet supported"
3860 " for variable-width vectors.\n");
3861 return false;
3864 unsigned int badness = 0;
3865 struct cgraph_node *bestn = NULL;
3866 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3867 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3868 else
3869 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3870 n = n->simdclone->next_clone)
3872 unsigned int this_badness = 0;
3873 if (n->simdclone->simdlen > vf
3874 || n->simdclone->nargs != nargs)
3875 continue;
3876 if (n->simdclone->simdlen < vf)
3877 this_badness += (exact_log2 (vf)
3878 - exact_log2 (n->simdclone->simdlen)) * 1024;
3879 if (n->simdclone->inbranch)
3880 this_badness += 2048;
3881 int target_badness = targetm.simd_clone.usable (n);
3882 if (target_badness < 0)
3883 continue;
3884 this_badness += target_badness * 512;
3885 /* FORNOW: Have to add code to add the mask argument. */
3886 if (n->simdclone->inbranch)
3887 continue;
3888 for (i = 0; i < nargs; i++)
3890 switch (n->simdclone->args[i].arg_type)
3892 case SIMD_CLONE_ARG_TYPE_VECTOR:
3893 if (!useless_type_conversion_p
3894 (n->simdclone->args[i].orig_type,
3895 TREE_TYPE (gimple_call_arg (stmt, i))))
3896 i = -1;
3897 else if (arginfo[i].dt == vect_constant_def
3898 || arginfo[i].dt == vect_external_def
3899 || arginfo[i].linear_step)
3900 this_badness += 64;
3901 break;
3902 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3903 if (arginfo[i].dt != vect_constant_def
3904 && arginfo[i].dt != vect_external_def)
3905 i = -1;
3906 break;
3907 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3908 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3909 if (arginfo[i].dt == vect_constant_def
3910 || arginfo[i].dt == vect_external_def
3911 || (arginfo[i].linear_step
3912 != n->simdclone->args[i].linear_step))
3913 i = -1;
3914 break;
3915 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3916 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3917 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3918 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3919 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3920 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3921 /* FORNOW */
3922 i = -1;
3923 break;
3924 case SIMD_CLONE_ARG_TYPE_MASK:
3925 gcc_unreachable ();
3927 if (i == (size_t) -1)
3928 break;
3929 if (n->simdclone->args[i].alignment > arginfo[i].align)
3931 i = -1;
3932 break;
3934 if (arginfo[i].align)
3935 this_badness += (exact_log2 (arginfo[i].align)
3936 - exact_log2 (n->simdclone->args[i].alignment));
3938 if (i == (size_t) -1)
3939 continue;
3940 if (bestn == NULL || this_badness < badness)
3942 bestn = n;
3943 badness = this_badness;
3947 if (bestn == NULL)
3948 return false;
3950 for (i = 0; i < nargs; i++)
3951 if ((arginfo[i].dt == vect_constant_def
3952 || arginfo[i].dt == vect_external_def)
3953 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3955 arginfo[i].vectype
3956 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3957 i)));
3958 if (arginfo[i].vectype == NULL
3959 || (simd_clone_subparts (arginfo[i].vectype)
3960 > bestn->simdclone->simdlen))
3961 return false;
3964 fndecl = bestn->decl;
3965 nunits = bestn->simdclone->simdlen;
3966 ncopies = vf / nunits;
3968 /* If the function isn't const, only allow it in simd loops where user
3969 has asserted that at least nunits consecutive iterations can be
3970 performed using SIMD instructions. */
3971 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3972 && gimple_vuse (stmt))
3973 return false;
3975 /* Sanity check: make sure that at least one copy of the vectorized stmt
3976 needs to be generated. */
3977 gcc_assert (ncopies >= 1);
3979 if (!vec_stmt) /* transformation not required. */
3981 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3982 for (i = 0; i < nargs; i++)
3983 if ((bestn->simdclone->args[i].arg_type
3984 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3985 || (bestn->simdclone->args[i].arg_type
3986 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3988 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3989 + 1);
3990 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3991 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3992 ? size_type_node : TREE_TYPE (arginfo[i].op);
3993 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3994 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3995 tree sll = arginfo[i].simd_lane_linear
3996 ? boolean_true_node : boolean_false_node;
3997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3999 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4000 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4001 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4002 return true;
4005 /* Transform. */
4007 if (dump_enabled_p ())
4008 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4010 /* Handle def. */
4011 scalar_dest = gimple_call_lhs (stmt);
4012 vec_dest = NULL_TREE;
4013 rtype = NULL_TREE;
4014 ratype = NULL_TREE;
4015 if (scalar_dest)
4017 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4018 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4019 if (TREE_CODE (rtype) == ARRAY_TYPE)
4021 ratype = rtype;
4022 rtype = TREE_TYPE (ratype);
4026 prev_stmt_info = NULL;
4027 for (j = 0; j < ncopies; ++j)
4029 /* Build argument list for the vectorized call. */
4030 if (j == 0)
4031 vargs.create (nargs);
4032 else
4033 vargs.truncate (0);
4035 for (i = 0; i < nargs; i++)
4037 unsigned int k, l, m, o;
4038 tree atype;
4039 op = gimple_call_arg (stmt, i);
4040 switch (bestn->simdclone->args[i].arg_type)
4042 case SIMD_CLONE_ARG_TYPE_VECTOR:
4043 atype = bestn->simdclone->args[i].vector_type;
4044 o = nunits / simd_clone_subparts (atype);
4045 for (m = j * o; m < (j + 1) * o; m++)
4047 if (simd_clone_subparts (atype)
4048 < simd_clone_subparts (arginfo[i].vectype))
4050 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4051 k = (simd_clone_subparts (arginfo[i].vectype)
4052 / simd_clone_subparts (atype));
4053 gcc_assert ((k & (k - 1)) == 0);
4054 if (m == 0)
4055 vec_oprnd0
4056 = vect_get_vec_def_for_operand (op, stmt_info);
4057 else
4059 vec_oprnd0 = arginfo[i].op;
4060 if ((m & (k - 1)) == 0)
4061 vec_oprnd0
4062 = vect_get_vec_def_for_stmt_copy (vinfo,
4063 vec_oprnd0);
4065 arginfo[i].op = vec_oprnd0;
4066 vec_oprnd0
4067 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4068 bitsize_int (prec),
4069 bitsize_int ((m & (k - 1)) * prec));
4070 gassign *new_stmt
4071 = gimple_build_assign (make_ssa_name (atype),
4072 vec_oprnd0);
4073 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4074 vargs.safe_push (gimple_assign_lhs (new_stmt));
4076 else
4078 k = (simd_clone_subparts (atype)
4079 / simd_clone_subparts (arginfo[i].vectype));
4080 gcc_assert ((k & (k - 1)) == 0);
4081 vec<constructor_elt, va_gc> *ctor_elts;
4082 if (k != 1)
4083 vec_alloc (ctor_elts, k);
4084 else
4085 ctor_elts = NULL;
4086 for (l = 0; l < k; l++)
4088 if (m == 0 && l == 0)
4089 vec_oprnd0
4090 = vect_get_vec_def_for_operand (op, stmt_info);
4091 else
4092 vec_oprnd0
4093 = vect_get_vec_def_for_stmt_copy (vinfo,
4094 arginfo[i].op);
4095 arginfo[i].op = vec_oprnd0;
4096 if (k == 1)
4097 break;
4098 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4099 vec_oprnd0);
4101 if (k == 1)
4102 vargs.safe_push (vec_oprnd0);
4103 else
4105 vec_oprnd0 = build_constructor (atype, ctor_elts);
4106 gassign *new_stmt
4107 = gimple_build_assign (make_ssa_name (atype),
4108 vec_oprnd0);
4109 vect_finish_stmt_generation (stmt_info, new_stmt,
4110 gsi);
4111 vargs.safe_push (gimple_assign_lhs (new_stmt));
4115 break;
4116 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4117 vargs.safe_push (op);
4118 break;
4119 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4120 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4121 if (j == 0)
4123 gimple_seq stmts;
4124 arginfo[i].op
4125 = force_gimple_operand (arginfo[i].op, &stmts, true,
4126 NULL_TREE);
4127 if (stmts != NULL)
4129 basic_block new_bb;
4130 edge pe = loop_preheader_edge (loop);
4131 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4132 gcc_assert (!new_bb);
4134 if (arginfo[i].simd_lane_linear)
4136 vargs.safe_push (arginfo[i].op);
4137 break;
4139 tree phi_res = copy_ssa_name (op);
4140 gphi *new_phi = create_phi_node (phi_res, loop->header);
4141 loop_vinfo->add_stmt (new_phi);
4142 add_phi_arg (new_phi, arginfo[i].op,
4143 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4144 enum tree_code code
4145 = POINTER_TYPE_P (TREE_TYPE (op))
4146 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4147 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4148 ? sizetype : TREE_TYPE (op);
4149 widest_int cst
4150 = wi::mul (bestn->simdclone->args[i].linear_step,
4151 ncopies * nunits);
4152 tree tcst = wide_int_to_tree (type, cst);
4153 tree phi_arg = copy_ssa_name (op);
4154 gassign *new_stmt
4155 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4156 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4157 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4158 loop_vinfo->add_stmt (new_stmt);
4159 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4160 UNKNOWN_LOCATION);
4161 arginfo[i].op = phi_res;
4162 vargs.safe_push (phi_res);
4164 else
4166 enum tree_code code
4167 = POINTER_TYPE_P (TREE_TYPE (op))
4168 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4169 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4170 ? sizetype : TREE_TYPE (op);
4171 widest_int cst
4172 = wi::mul (bestn->simdclone->args[i].linear_step,
4173 j * nunits);
4174 tree tcst = wide_int_to_tree (type, cst);
4175 new_temp = make_ssa_name (TREE_TYPE (op));
4176 gassign *new_stmt
4177 = gimple_build_assign (new_temp, code,
4178 arginfo[i].op, tcst);
4179 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4180 vargs.safe_push (new_temp);
4182 break;
4183 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4186 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4187 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4188 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4189 default:
4190 gcc_unreachable ();
4194 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4195 if (vec_dest)
4197 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4198 if (ratype)
4199 new_temp = create_tmp_var (ratype);
4200 else if (simd_clone_subparts (vectype)
4201 == simd_clone_subparts (rtype))
4202 new_temp = make_ssa_name (vec_dest, new_call);
4203 else
4204 new_temp = make_ssa_name (rtype, new_call);
4205 gimple_call_set_lhs (new_call, new_temp);
4207 stmt_vec_info new_stmt_info
4208 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4210 if (vec_dest)
4212 if (simd_clone_subparts (vectype) < nunits)
4214 unsigned int k, l;
4215 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4216 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4217 k = nunits / simd_clone_subparts (vectype);
4218 gcc_assert ((k & (k - 1)) == 0);
4219 for (l = 0; l < k; l++)
4221 tree t;
4222 if (ratype)
4224 t = build_fold_addr_expr (new_temp);
4225 t = build2 (MEM_REF, vectype, t,
4226 build_int_cst (TREE_TYPE (t), l * bytes));
4228 else
4229 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4230 bitsize_int (prec), bitsize_int (l * prec));
4231 gimple *new_stmt
4232 = gimple_build_assign (make_ssa_name (vectype), t);
4233 new_stmt_info
4234 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4236 if (j == 0 && l == 0)
4237 STMT_VINFO_VEC_STMT (stmt_info)
4238 = *vec_stmt = new_stmt_info;
4239 else
4240 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4242 prev_stmt_info = new_stmt_info;
4245 if (ratype)
4246 vect_clobber_variable (stmt_info, gsi, new_temp);
4247 continue;
4249 else if (simd_clone_subparts (vectype) > nunits)
4251 unsigned int k = (simd_clone_subparts (vectype)
4252 / simd_clone_subparts (rtype));
4253 gcc_assert ((k & (k - 1)) == 0);
4254 if ((j & (k - 1)) == 0)
4255 vec_alloc (ret_ctor_elts, k);
4256 if (ratype)
4258 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4259 for (m = 0; m < o; m++)
4261 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4262 size_int (m), NULL_TREE, NULL_TREE);
4263 gimple *new_stmt
4264 = gimple_build_assign (make_ssa_name (rtype), tem);
4265 new_stmt_info
4266 = vect_finish_stmt_generation (stmt_info, new_stmt,
4267 gsi);
4268 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4269 gimple_assign_lhs (new_stmt));
4271 vect_clobber_variable (stmt_info, gsi, new_temp);
4273 else
4274 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4275 if ((j & (k - 1)) != k - 1)
4276 continue;
4277 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4278 gimple *new_stmt
4279 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4280 new_stmt_info
4281 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4283 if ((unsigned) j == k - 1)
4284 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4285 else
4286 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4288 prev_stmt_info = new_stmt_info;
4289 continue;
4291 else if (ratype)
4293 tree t = build_fold_addr_expr (new_temp);
4294 t = build2 (MEM_REF, vectype, t,
4295 build_int_cst (TREE_TYPE (t), 0));
4296 gimple *new_stmt
4297 = gimple_build_assign (make_ssa_name (vec_dest), t);
4298 new_stmt_info
4299 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4300 vect_clobber_variable (stmt_info, gsi, new_temp);
4304 if (j == 0)
4305 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4306 else
4307 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4309 prev_stmt_info = new_stmt_info;
4312 vargs.release ();
4314 /* The call in STMT might prevent it from being removed in dce.
4315 We however cannot remove it here, due to the way the ssa name
4316 it defines is mapped to the new definition. So just replace
4317 rhs of the statement with something harmless. */
4319 if (slp_node)
4320 return true;
4322 gimple *new_stmt;
4323 if (scalar_dest)
4325 type = TREE_TYPE (scalar_dest);
4326 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4327 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4329 else
4330 new_stmt = gimple_build_nop ();
4331 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4332 unlink_stmt_vdef (stmt);
4334 return true;
4338 /* Function vect_gen_widened_results_half
4340 Create a vector stmt whose code, type, number of arguments, and result
4341 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4342 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4343 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4344 needs to be created (DECL is a function-decl of a target-builtin).
4345 STMT_INFO is the original scalar stmt that we are vectorizing. */
4347 static gimple *
4348 vect_gen_widened_results_half (enum tree_code code,
4349 tree decl,
4350 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4351 tree vec_dest, gimple_stmt_iterator *gsi,
4352 stmt_vec_info stmt_info)
4354 gimple *new_stmt;
4355 tree new_temp;
4357 /* Generate half of the widened result: */
4358 if (code == CALL_EXPR)
4360 /* Target specific support */
4361 if (op_type == binary_op)
4362 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4363 else
4364 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4365 new_temp = make_ssa_name (vec_dest, new_stmt);
4366 gimple_call_set_lhs (new_stmt, new_temp);
4368 else
4370 /* Generic support */
4371 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4372 if (op_type != binary_op)
4373 vec_oprnd1 = NULL;
4374 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4375 new_temp = make_ssa_name (vec_dest, new_stmt);
4376 gimple_assign_set_lhs (new_stmt, new_temp);
4378 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4380 return new_stmt;
4384 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4385 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4386 containing scalar operand), and for the rest we get a copy with
4387 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4388 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4389 The vectors are collected into VEC_OPRNDS. */
4391 static void
4392 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4393 vec<tree> *vec_oprnds, int multi_step_cvt)
4395 vec_info *vinfo = stmt_info->vinfo;
4396 tree vec_oprnd;
4398 /* Get first vector operand. */
4399 /* All the vector operands except the very first one (that is scalar oprnd)
4400 are stmt copies. */
4401 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4402 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4403 else
4404 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4406 vec_oprnds->quick_push (vec_oprnd);
4408 /* Get second vector operand. */
4409 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4410 vec_oprnds->quick_push (vec_oprnd);
4412 *oprnd = vec_oprnd;
4414 /* For conversion in multiple steps, continue to get operands
4415 recursively. */
4416 if (multi_step_cvt)
4417 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4418 multi_step_cvt - 1);
4422 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4423 For multi-step conversions store the resulting vectors and call the function
4424 recursively. */
4426 static void
4427 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4428 int multi_step_cvt,
4429 stmt_vec_info stmt_info,
4430 vec<tree> vec_dsts,
4431 gimple_stmt_iterator *gsi,
4432 slp_tree slp_node, enum tree_code code,
4433 stmt_vec_info *prev_stmt_info)
4435 unsigned int i;
4436 tree vop0, vop1, new_tmp, vec_dest;
4438 vec_dest = vec_dsts.pop ();
4440 for (i = 0; i < vec_oprnds->length (); i += 2)
4442 /* Create demotion operation. */
4443 vop0 = (*vec_oprnds)[i];
4444 vop1 = (*vec_oprnds)[i + 1];
4445 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4446 new_tmp = make_ssa_name (vec_dest, new_stmt);
4447 gimple_assign_set_lhs (new_stmt, new_tmp);
4448 stmt_vec_info new_stmt_info
4449 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4451 if (multi_step_cvt)
4452 /* Store the resulting vector for next recursive call. */
4453 (*vec_oprnds)[i/2] = new_tmp;
4454 else
4456 /* This is the last step of the conversion sequence. Store the
4457 vectors in SLP_NODE or in vector info of the scalar statement
4458 (or in STMT_VINFO_RELATED_STMT chain). */
4459 if (slp_node)
4460 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4461 else
4463 if (!*prev_stmt_info)
4464 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4465 else
4466 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4468 *prev_stmt_info = new_stmt_info;
4473 /* For multi-step demotion operations we first generate demotion operations
4474 from the source type to the intermediate types, and then combine the
4475 results (stored in VEC_OPRNDS) in demotion operation to the destination
4476 type. */
4477 if (multi_step_cvt)
4479 /* At each level of recursion we have half of the operands we had at the
4480 previous level. */
4481 vec_oprnds->truncate ((i+1)/2);
4482 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4483 stmt_info, vec_dsts, gsi,
4484 slp_node, VEC_PACK_TRUNC_EXPR,
4485 prev_stmt_info);
4488 vec_dsts.quick_push (vec_dest);
4492 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4493 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4494 STMT_INFO. For multi-step conversions store the resulting vectors and
4495 call the function recursively. */
4497 static void
4498 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4499 vec<tree> *vec_oprnds1,
4500 stmt_vec_info stmt_info, tree vec_dest,
4501 gimple_stmt_iterator *gsi,
4502 enum tree_code code1,
4503 enum tree_code code2, tree decl1,
4504 tree decl2, int op_type)
4506 int i;
4507 tree vop0, vop1, new_tmp1, new_tmp2;
4508 gimple *new_stmt1, *new_stmt2;
4509 vec<tree> vec_tmp = vNULL;
4511 vec_tmp.create (vec_oprnds0->length () * 2);
4512 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4514 if (op_type == binary_op)
4515 vop1 = (*vec_oprnds1)[i];
4516 else
4517 vop1 = NULL_TREE;
4519 /* Generate the two halves of promotion operation. */
4520 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4521 op_type, vec_dest, gsi,
4522 stmt_info);
4523 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4524 op_type, vec_dest, gsi,
4525 stmt_info);
4526 if (is_gimple_call (new_stmt1))
4528 new_tmp1 = gimple_call_lhs (new_stmt1);
4529 new_tmp2 = gimple_call_lhs (new_stmt2);
4531 else
4533 new_tmp1 = gimple_assign_lhs (new_stmt1);
4534 new_tmp2 = gimple_assign_lhs (new_stmt2);
4537 /* Store the results for the next step. */
4538 vec_tmp.quick_push (new_tmp1);
4539 vec_tmp.quick_push (new_tmp2);
4542 vec_oprnds0->release ();
4543 *vec_oprnds0 = vec_tmp;
4547 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4548 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4549 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4550 Return true if STMT_INFO is vectorizable in this way. */
4552 static bool
4553 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4554 stmt_vec_info *vec_stmt, slp_tree slp_node,
4555 stmt_vector_for_cost *cost_vec)
4557 tree vec_dest;
4558 tree scalar_dest;
4559 tree op0, op1 = NULL_TREE;
4560 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4561 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4562 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4563 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4564 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4565 tree new_temp;
4566 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4567 int ndts = 2;
4568 stmt_vec_info prev_stmt_info;
4569 poly_uint64 nunits_in;
4570 poly_uint64 nunits_out;
4571 tree vectype_out, vectype_in;
4572 int ncopies, i, j;
4573 tree lhs_type, rhs_type;
4574 enum { NARROW, NONE, WIDEN } modifier;
4575 vec<tree> vec_oprnds0 = vNULL;
4576 vec<tree> vec_oprnds1 = vNULL;
4577 tree vop0;
4578 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4579 vec_info *vinfo = stmt_info->vinfo;
4580 int multi_step_cvt = 0;
4581 vec<tree> interm_types = vNULL;
4582 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4583 int op_type;
4584 unsigned short fltsz;
4586 /* Is STMT a vectorizable conversion? */
4588 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4589 return false;
4591 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4592 && ! vec_stmt)
4593 return false;
4595 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4596 if (!stmt)
4597 return false;
4599 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4600 return false;
4602 code = gimple_assign_rhs_code (stmt);
4603 if (!CONVERT_EXPR_CODE_P (code)
4604 && code != FIX_TRUNC_EXPR
4605 && code != FLOAT_EXPR
4606 && code != WIDEN_MULT_EXPR
4607 && code != WIDEN_LSHIFT_EXPR)
4608 return false;
4610 op_type = TREE_CODE_LENGTH (code);
4612 /* Check types of lhs and rhs. */
4613 scalar_dest = gimple_assign_lhs (stmt);
4614 lhs_type = TREE_TYPE (scalar_dest);
4615 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4617 op0 = gimple_assign_rhs1 (stmt);
4618 rhs_type = TREE_TYPE (op0);
4620 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4621 && !((INTEGRAL_TYPE_P (lhs_type)
4622 && INTEGRAL_TYPE_P (rhs_type))
4623 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4624 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4625 return false;
4627 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4628 && ((INTEGRAL_TYPE_P (lhs_type)
4629 && !type_has_mode_precision_p (lhs_type))
4630 || (INTEGRAL_TYPE_P (rhs_type)
4631 && !type_has_mode_precision_p (rhs_type))))
4633 if (dump_enabled_p ())
4634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4635 "type conversion to/from bit-precision unsupported."
4636 "\n");
4637 return false;
4640 /* Check the operands of the operation. */
4641 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4643 if (dump_enabled_p ())
4644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4645 "use not simple.\n");
4646 return false;
4648 if (op_type == binary_op)
4650 bool ok;
4652 op1 = gimple_assign_rhs2 (stmt);
4653 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4654 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4655 OP1. */
4656 if (CONSTANT_CLASS_P (op0))
4657 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4658 else
4659 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4661 if (!ok)
4663 if (dump_enabled_p ())
4664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4665 "use not simple.\n");
4666 return false;
4670 /* If op0 is an external or constant defs use a vector type of
4671 the same size as the output vector type. */
4672 if (!vectype_in)
4673 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4674 if (vec_stmt)
4675 gcc_assert (vectype_in);
4676 if (!vectype_in)
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4680 "no vectype for scalar type %T\n", rhs_type);
4682 return false;
4685 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4686 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4690 "can't convert between boolean and non "
4691 "boolean vectors %T\n", rhs_type);
4693 return false;
4696 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4697 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4698 if (known_eq (nunits_out, nunits_in))
4699 modifier = NONE;
4700 else if (multiple_p (nunits_out, nunits_in))
4701 modifier = NARROW;
4702 else
4704 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4705 modifier = WIDEN;
4708 /* Multiple types in SLP are handled by creating the appropriate number of
4709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4710 case of SLP. */
4711 if (slp_node)
4712 ncopies = 1;
4713 else if (modifier == NARROW)
4714 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4715 else
4716 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4718 /* Sanity check: make sure that at least one copy of the vectorized stmt
4719 needs to be generated. */
4720 gcc_assert (ncopies >= 1);
4722 bool found_mode = false;
4723 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4724 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4725 opt_scalar_mode rhs_mode_iter;
4727 /* Supportable by target? */
4728 switch (modifier)
4730 case NONE:
4731 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4732 return false;
4733 if (supportable_convert_operation (code, vectype_out, vectype_in,
4734 &decl1, &code1))
4735 break;
4736 /* FALLTHRU */
4737 unsupported:
4738 if (dump_enabled_p ())
4739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4740 "conversion not supported by target.\n");
4741 return false;
4743 case WIDEN:
4744 if (supportable_widening_operation (code, stmt_info, vectype_out,
4745 vectype_in, &code1, &code2,
4746 &multi_step_cvt, &interm_types))
4748 /* Binary widening operation can only be supported directly by the
4749 architecture. */
4750 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4751 break;
4754 if (code != FLOAT_EXPR
4755 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4756 goto unsupported;
4758 fltsz = GET_MODE_SIZE (lhs_mode);
4759 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4761 rhs_mode = rhs_mode_iter.require ();
4762 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4763 break;
4765 cvt_type
4766 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4767 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4768 if (cvt_type == NULL_TREE)
4769 goto unsupported;
4771 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4773 if (!supportable_convert_operation (code, vectype_out,
4774 cvt_type, &decl1, &codecvt1))
4775 goto unsupported;
4777 else if (!supportable_widening_operation (code, stmt_info,
4778 vectype_out, cvt_type,
4779 &codecvt1, &codecvt2,
4780 &multi_step_cvt,
4781 &interm_types))
4782 continue;
4783 else
4784 gcc_assert (multi_step_cvt == 0);
4786 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4787 vectype_in, &code1, &code2,
4788 &multi_step_cvt, &interm_types))
4790 found_mode = true;
4791 break;
4795 if (!found_mode)
4796 goto unsupported;
4798 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4799 codecvt2 = ERROR_MARK;
4800 else
4802 multi_step_cvt++;
4803 interm_types.safe_push (cvt_type);
4804 cvt_type = NULL_TREE;
4806 break;
4808 case NARROW:
4809 gcc_assert (op_type == unary_op);
4810 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4811 &code1, &multi_step_cvt,
4812 &interm_types))
4813 break;
4815 if (code != FIX_TRUNC_EXPR
4816 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4817 goto unsupported;
4819 cvt_type
4820 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4821 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4822 if (cvt_type == NULL_TREE)
4823 goto unsupported;
4824 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4825 &decl1, &codecvt1))
4826 goto unsupported;
4827 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4828 &code1, &multi_step_cvt,
4829 &interm_types))
4830 break;
4831 goto unsupported;
4833 default:
4834 gcc_unreachable ();
4837 if (!vec_stmt) /* transformation not required. */
4839 DUMP_VECT_SCOPE ("vectorizable_conversion");
4840 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4842 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4843 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4844 cost_vec);
4846 else if (modifier == NARROW)
4848 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4849 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4850 cost_vec);
4852 else
4854 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4855 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4856 cost_vec);
4858 interm_types.release ();
4859 return true;
4862 /* Transform. */
4863 if (dump_enabled_p ())
4864 dump_printf_loc (MSG_NOTE, vect_location,
4865 "transform conversion. ncopies = %d.\n", ncopies);
4867 if (op_type == binary_op)
4869 if (CONSTANT_CLASS_P (op0))
4870 op0 = fold_convert (TREE_TYPE (op1), op0);
4871 else if (CONSTANT_CLASS_P (op1))
4872 op1 = fold_convert (TREE_TYPE (op0), op1);
4875 /* In case of multi-step conversion, we first generate conversion operations
4876 to the intermediate types, and then from that types to the final one.
4877 We create vector destinations for the intermediate type (TYPES) received
4878 from supportable_*_operation, and store them in the correct order
4879 for future use in vect_create_vectorized_*_stmts (). */
4880 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4881 vec_dest = vect_create_destination_var (scalar_dest,
4882 (cvt_type && modifier == WIDEN)
4883 ? cvt_type : vectype_out);
4884 vec_dsts.quick_push (vec_dest);
4886 if (multi_step_cvt)
4888 for (i = interm_types.length () - 1;
4889 interm_types.iterate (i, &intermediate_type); i--)
4891 vec_dest = vect_create_destination_var (scalar_dest,
4892 intermediate_type);
4893 vec_dsts.quick_push (vec_dest);
4897 if (cvt_type)
4898 vec_dest = vect_create_destination_var (scalar_dest,
4899 modifier == WIDEN
4900 ? vectype_out : cvt_type);
4902 if (!slp_node)
4904 if (modifier == WIDEN)
4906 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4907 if (op_type == binary_op)
4908 vec_oprnds1.create (1);
4910 else if (modifier == NARROW)
4911 vec_oprnds0.create (
4912 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4914 else if (code == WIDEN_LSHIFT_EXPR)
4915 vec_oprnds1.create (slp_node->vec_stmts_size);
4917 last_oprnd = op0;
4918 prev_stmt_info = NULL;
4919 switch (modifier)
4921 case NONE:
4922 for (j = 0; j < ncopies; j++)
4924 if (j == 0)
4925 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
4926 NULL, slp_node);
4927 else
4928 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
4930 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4932 stmt_vec_info new_stmt_info;
4933 /* Arguments are ready, create the new vector stmt. */
4934 if (code1 == CALL_EXPR)
4936 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4937 new_temp = make_ssa_name (vec_dest, new_stmt);
4938 gimple_call_set_lhs (new_stmt, new_temp);
4939 new_stmt_info
4940 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4942 else
4944 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4945 gassign *new_stmt
4946 = gimple_build_assign (vec_dest, code1, vop0);
4947 new_temp = make_ssa_name (vec_dest, new_stmt);
4948 gimple_assign_set_lhs (new_stmt, new_temp);
4949 new_stmt_info
4950 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4953 if (slp_node)
4954 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4955 else
4957 if (!prev_stmt_info)
4958 STMT_VINFO_VEC_STMT (stmt_info)
4959 = *vec_stmt = new_stmt_info;
4960 else
4961 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4962 prev_stmt_info = new_stmt_info;
4966 break;
4968 case WIDEN:
4969 /* In case the vectorization factor (VF) is bigger than the number
4970 of elements that we can fit in a vectype (nunits), we have to
4971 generate more than one vector stmt - i.e - we need to "unroll"
4972 the vector stmt by a factor VF/nunits. */
4973 for (j = 0; j < ncopies; j++)
4975 /* Handle uses. */
4976 if (j == 0)
4978 if (slp_node)
4980 if (code == WIDEN_LSHIFT_EXPR)
4982 unsigned int k;
4984 vec_oprnd1 = op1;
4985 /* Store vec_oprnd1 for every vector stmt to be created
4986 for SLP_NODE. We check during the analysis that all
4987 the shift arguments are the same. */
4988 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4989 vec_oprnds1.quick_push (vec_oprnd1);
4991 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
4992 &vec_oprnds0, NULL, slp_node);
4994 else
4995 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
4996 &vec_oprnds1, slp_node);
4998 else
5000 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5001 vec_oprnds0.quick_push (vec_oprnd0);
5002 if (op_type == binary_op)
5004 if (code == WIDEN_LSHIFT_EXPR)
5005 vec_oprnd1 = op1;
5006 else
5007 vec_oprnd1
5008 = vect_get_vec_def_for_operand (op1, stmt_info);
5009 vec_oprnds1.quick_push (vec_oprnd1);
5013 else
5015 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5016 vec_oprnds0.truncate (0);
5017 vec_oprnds0.quick_push (vec_oprnd0);
5018 if (op_type == binary_op)
5020 if (code == WIDEN_LSHIFT_EXPR)
5021 vec_oprnd1 = op1;
5022 else
5023 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5024 vec_oprnd1);
5025 vec_oprnds1.truncate (0);
5026 vec_oprnds1.quick_push (vec_oprnd1);
5030 /* Arguments are ready. Create the new vector stmts. */
5031 for (i = multi_step_cvt; i >= 0; i--)
5033 tree this_dest = vec_dsts[i];
5034 enum tree_code c1 = code1, c2 = code2;
5035 if (i == 0 && codecvt2 != ERROR_MARK)
5037 c1 = codecvt1;
5038 c2 = codecvt2;
5040 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5041 &vec_oprnds1, stmt_info,
5042 this_dest, gsi,
5043 c1, c2, decl1, decl2,
5044 op_type);
5047 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5049 stmt_vec_info new_stmt_info;
5050 if (cvt_type)
5052 if (codecvt1 == CALL_EXPR)
5054 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5055 new_temp = make_ssa_name (vec_dest, new_stmt);
5056 gimple_call_set_lhs (new_stmt, new_temp);
5057 new_stmt_info
5058 = vect_finish_stmt_generation (stmt_info, new_stmt,
5059 gsi);
5061 else
5063 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5064 new_temp = make_ssa_name (vec_dest);
5065 gassign *new_stmt
5066 = gimple_build_assign (new_temp, codecvt1, vop0);
5067 new_stmt_info
5068 = vect_finish_stmt_generation (stmt_info, new_stmt,
5069 gsi);
5072 else
5073 new_stmt_info = vinfo->lookup_def (vop0);
5075 if (slp_node)
5076 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5077 else
5079 if (!prev_stmt_info)
5080 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5081 else
5082 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5083 prev_stmt_info = new_stmt_info;
5088 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5089 break;
5091 case NARROW:
5092 /* In case the vectorization factor (VF) is bigger than the number
5093 of elements that we can fit in a vectype (nunits), we have to
5094 generate more than one vector stmt - i.e - we need to "unroll"
5095 the vector stmt by a factor VF/nunits. */
5096 for (j = 0; j < ncopies; j++)
5098 /* Handle uses. */
5099 if (slp_node)
5100 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5101 slp_node);
5102 else
5104 vec_oprnds0.truncate (0);
5105 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5106 vect_pow2 (multi_step_cvt) - 1);
5109 /* Arguments are ready. Create the new vector stmts. */
5110 if (cvt_type)
5111 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5113 if (codecvt1 == CALL_EXPR)
5115 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5116 new_temp = make_ssa_name (vec_dest, new_stmt);
5117 gimple_call_set_lhs (new_stmt, new_temp);
5118 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5120 else
5122 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5123 new_temp = make_ssa_name (vec_dest);
5124 gassign *new_stmt
5125 = gimple_build_assign (new_temp, codecvt1, vop0);
5126 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5129 vec_oprnds0[i] = new_temp;
5132 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5133 stmt_info, vec_dsts, gsi,
5134 slp_node, code1,
5135 &prev_stmt_info);
5138 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5139 break;
5142 vec_oprnds0.release ();
5143 vec_oprnds1.release ();
5144 interm_types.release ();
5146 return true;
5150 /* Function vectorizable_assignment.
5152 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5153 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5154 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5155 Return true if STMT_INFO is vectorizable in this way. */
5157 static bool
5158 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5159 stmt_vec_info *vec_stmt, slp_tree slp_node,
5160 stmt_vector_for_cost *cost_vec)
5162 tree vec_dest;
5163 tree scalar_dest;
5164 tree op;
5165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5166 tree new_temp;
5167 enum vect_def_type dt[1] = {vect_unknown_def_type};
5168 int ndts = 1;
5169 int ncopies;
5170 int i, j;
5171 vec<tree> vec_oprnds = vNULL;
5172 tree vop;
5173 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5174 vec_info *vinfo = stmt_info->vinfo;
5175 stmt_vec_info prev_stmt_info = NULL;
5176 enum tree_code code;
5177 tree vectype_in;
5179 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5180 return false;
5182 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5183 && ! vec_stmt)
5184 return false;
5186 /* Is vectorizable assignment? */
5187 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5188 if (!stmt)
5189 return false;
5191 scalar_dest = gimple_assign_lhs (stmt);
5192 if (TREE_CODE (scalar_dest) != SSA_NAME)
5193 return false;
5195 code = gimple_assign_rhs_code (stmt);
5196 if (gimple_assign_single_p (stmt)
5197 || code == PAREN_EXPR
5198 || CONVERT_EXPR_CODE_P (code))
5199 op = gimple_assign_rhs1 (stmt);
5200 else
5201 return false;
5203 if (code == VIEW_CONVERT_EXPR)
5204 op = TREE_OPERAND (op, 0);
5206 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5207 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5209 /* Multiple types in SLP are handled by creating the appropriate number of
5210 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5211 case of SLP. */
5212 if (slp_node)
5213 ncopies = 1;
5214 else
5215 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5217 gcc_assert (ncopies >= 1);
5219 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5221 if (dump_enabled_p ())
5222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5223 "use not simple.\n");
5224 return false;
5227 /* We can handle NOP_EXPR conversions that do not change the number
5228 of elements or the vector size. */
5229 if ((CONVERT_EXPR_CODE_P (code)
5230 || code == VIEW_CONVERT_EXPR)
5231 && (!vectype_in
5232 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5233 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5234 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5235 return false;
5237 /* We do not handle bit-precision changes. */
5238 if ((CONVERT_EXPR_CODE_P (code)
5239 || code == VIEW_CONVERT_EXPR)
5240 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5241 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5242 || !type_has_mode_precision_p (TREE_TYPE (op)))
5243 /* But a conversion that does not change the bit-pattern is ok. */
5244 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5245 > TYPE_PRECISION (TREE_TYPE (op)))
5246 && TYPE_UNSIGNED (TREE_TYPE (op)))
5247 /* Conversion between boolean types of different sizes is
5248 a simple assignment in case their vectypes are same
5249 boolean vectors. */
5250 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5251 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5253 if (dump_enabled_p ())
5254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5255 "type conversion to/from bit-precision "
5256 "unsupported.\n");
5257 return false;
5260 if (!vec_stmt) /* transformation not required. */
5262 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5263 DUMP_VECT_SCOPE ("vectorizable_assignment");
5264 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5265 return true;
5268 /* Transform. */
5269 if (dump_enabled_p ())
5270 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5272 /* Handle def. */
5273 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5275 /* Handle use. */
5276 for (j = 0; j < ncopies; j++)
5278 /* Handle uses. */
5279 if (j == 0)
5280 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5281 else
5282 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5284 /* Arguments are ready. create the new vector stmt. */
5285 stmt_vec_info new_stmt_info = NULL;
5286 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5288 if (CONVERT_EXPR_CODE_P (code)
5289 || code == VIEW_CONVERT_EXPR)
5290 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5291 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5292 new_temp = make_ssa_name (vec_dest, new_stmt);
5293 gimple_assign_set_lhs (new_stmt, new_temp);
5294 new_stmt_info
5295 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5296 if (slp_node)
5297 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5300 if (slp_node)
5301 continue;
5303 if (j == 0)
5304 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5305 else
5306 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5308 prev_stmt_info = new_stmt_info;
5311 vec_oprnds.release ();
5312 return true;
5316 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5317 either as shift by a scalar or by a vector. */
5319 bool
5320 vect_supportable_shift (enum tree_code code, tree scalar_type)
5323 machine_mode vec_mode;
5324 optab optab;
5325 int icode;
5326 tree vectype;
5328 vectype = get_vectype_for_scalar_type (scalar_type);
5329 if (!vectype)
5330 return false;
5332 optab = optab_for_tree_code (code, vectype, optab_scalar);
5333 if (!optab
5334 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5336 optab = optab_for_tree_code (code, vectype, optab_vector);
5337 if (!optab
5338 || (optab_handler (optab, TYPE_MODE (vectype))
5339 == CODE_FOR_nothing))
5340 return false;
5343 vec_mode = TYPE_MODE (vectype);
5344 icode = (int) optab_handler (optab, vec_mode);
5345 if (icode == CODE_FOR_nothing)
5346 return false;
5348 return true;
5352 /* Function vectorizable_shift.
5354 Check if STMT_INFO performs a shift operation that can be vectorized.
5355 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5356 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5357 Return true if STMT_INFO is vectorizable in this way. */
5359 bool
5360 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5361 stmt_vec_info *vec_stmt, slp_tree slp_node,
5362 stmt_vector_for_cost *cost_vec)
5364 tree vec_dest;
5365 tree scalar_dest;
5366 tree op0, op1 = NULL;
5367 tree vec_oprnd1 = NULL_TREE;
5368 tree vectype;
5369 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5370 enum tree_code code;
5371 machine_mode vec_mode;
5372 tree new_temp;
5373 optab optab;
5374 int icode;
5375 machine_mode optab_op2_mode;
5376 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5377 int ndts = 2;
5378 stmt_vec_info prev_stmt_info;
5379 poly_uint64 nunits_in;
5380 poly_uint64 nunits_out;
5381 tree vectype_out;
5382 tree op1_vectype;
5383 int ncopies;
5384 int j, i;
5385 vec<tree> vec_oprnds0 = vNULL;
5386 vec<tree> vec_oprnds1 = vNULL;
5387 tree vop0, vop1;
5388 unsigned int k;
5389 bool scalar_shift_arg = true;
5390 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5391 vec_info *vinfo = stmt_info->vinfo;
5393 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5394 return false;
5396 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5397 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5398 && ! vec_stmt)
5399 return false;
5401 /* Is STMT a vectorizable binary/unary operation? */
5402 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5403 if (!stmt)
5404 return false;
5406 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5407 return false;
5409 code = gimple_assign_rhs_code (stmt);
5411 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5412 || code == RROTATE_EXPR))
5413 return false;
5415 scalar_dest = gimple_assign_lhs (stmt);
5416 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5417 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5419 if (dump_enabled_p ())
5420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5421 "bit-precision shifts not supported.\n");
5422 return false;
5425 op0 = gimple_assign_rhs1 (stmt);
5426 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5428 if (dump_enabled_p ())
5429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5430 "use not simple.\n");
5431 return false;
5433 /* If op0 is an external or constant def use a vector type with
5434 the same size as the output vector type. */
5435 if (!vectype)
5436 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5437 if (vec_stmt)
5438 gcc_assert (vectype);
5439 if (!vectype)
5441 if (dump_enabled_p ())
5442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5443 "no vectype for scalar type\n");
5444 return false;
5447 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5448 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5449 if (maybe_ne (nunits_out, nunits_in))
5450 return false;
5452 op1 = gimple_assign_rhs2 (stmt);
5453 stmt_vec_info op1_def_stmt_info;
5454 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5455 &op1_def_stmt_info))
5457 if (dump_enabled_p ())
5458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5459 "use not simple.\n");
5460 return false;
5463 /* Multiple types in SLP are handled by creating the appropriate number of
5464 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5465 case of SLP. */
5466 if (slp_node)
5467 ncopies = 1;
5468 else
5469 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5471 gcc_assert (ncopies >= 1);
5473 /* Determine whether the shift amount is a vector, or scalar. If the
5474 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5476 if ((dt[1] == vect_internal_def
5477 || dt[1] == vect_induction_def
5478 || dt[1] == vect_nested_cycle)
5479 && !slp_node)
5480 scalar_shift_arg = false;
5481 else if (dt[1] == vect_constant_def
5482 || dt[1] == vect_external_def
5483 || dt[1] == vect_internal_def)
5485 /* In SLP, need to check whether the shift count is the same,
5486 in loops if it is a constant or invariant, it is always
5487 a scalar shift. */
5488 if (slp_node)
5490 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5491 stmt_vec_info slpstmt_info;
5493 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5495 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5496 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5497 scalar_shift_arg = false;
5501 /* If the shift amount is computed by a pattern stmt we cannot
5502 use the scalar amount directly thus give up and use a vector
5503 shift. */
5504 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5505 scalar_shift_arg = false;
5507 else
5509 if (dump_enabled_p ())
5510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5511 "operand mode requires invariant argument.\n");
5512 return false;
5515 /* Vector shifted by vector. */
5516 if (!scalar_shift_arg)
5518 optab = optab_for_tree_code (code, vectype, optab_vector);
5519 if (dump_enabled_p ())
5520 dump_printf_loc (MSG_NOTE, vect_location,
5521 "vector/vector shift/rotate found.\n");
5523 if (!op1_vectype)
5524 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5525 if (op1_vectype == NULL_TREE
5526 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5528 if (dump_enabled_p ())
5529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5530 "unusable type for last operand in"
5531 " vector/vector shift/rotate.\n");
5532 return false;
5535 /* See if the machine has a vector shifted by scalar insn and if not
5536 then see if it has a vector shifted by vector insn. */
5537 else
5539 optab = optab_for_tree_code (code, vectype, optab_scalar);
5540 if (optab
5541 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5543 if (dump_enabled_p ())
5544 dump_printf_loc (MSG_NOTE, vect_location,
5545 "vector/scalar shift/rotate found.\n");
5547 else
5549 optab = optab_for_tree_code (code, vectype, optab_vector);
5550 if (optab
5551 && (optab_handler (optab, TYPE_MODE (vectype))
5552 != CODE_FOR_nothing))
5554 scalar_shift_arg = false;
5556 if (dump_enabled_p ())
5557 dump_printf_loc (MSG_NOTE, vect_location,
5558 "vector/vector shift/rotate found.\n");
5560 /* Unlike the other binary operators, shifts/rotates have
5561 the rhs being int, instead of the same type as the lhs,
5562 so make sure the scalar is the right type if we are
5563 dealing with vectors of long long/long/short/char. */
5564 if (dt[1] == vect_constant_def)
5565 op1 = fold_convert (TREE_TYPE (vectype), op1);
5566 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5567 TREE_TYPE (op1)))
5569 if (slp_node
5570 && TYPE_MODE (TREE_TYPE (vectype))
5571 != TYPE_MODE (TREE_TYPE (op1)))
5573 if (dump_enabled_p ())
5574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5575 "unusable type for last operand in"
5576 " vector/vector shift/rotate.\n");
5577 return false;
5579 if (vec_stmt && !slp_node)
5581 op1 = fold_convert (TREE_TYPE (vectype), op1);
5582 op1 = vect_init_vector (stmt_info, op1,
5583 TREE_TYPE (vectype), NULL);
5590 /* Supportable by target? */
5591 if (!optab)
5593 if (dump_enabled_p ())
5594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5595 "no optab.\n");
5596 return false;
5598 vec_mode = TYPE_MODE (vectype);
5599 icode = (int) optab_handler (optab, vec_mode);
5600 if (icode == CODE_FOR_nothing)
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5604 "op not supported by target.\n");
5605 /* Check only during analysis. */
5606 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5607 || (!vec_stmt
5608 && !vect_worthwhile_without_simd_p (vinfo, code)))
5609 return false;
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_NOTE, vect_location,
5612 "proceeding using word mode.\n");
5615 /* Worthwhile without SIMD support? Check only during analysis. */
5616 if (!vec_stmt
5617 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5618 && !vect_worthwhile_without_simd_p (vinfo, code))
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5622 "not worthwhile without SIMD support.\n");
5623 return false;
5626 if (!vec_stmt) /* transformation not required. */
5628 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5629 DUMP_VECT_SCOPE ("vectorizable_shift");
5630 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5631 return true;
5634 /* Transform. */
5636 if (dump_enabled_p ())
5637 dump_printf_loc (MSG_NOTE, vect_location,
5638 "transform binary/unary operation.\n");
5640 /* Handle def. */
5641 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5643 prev_stmt_info = NULL;
5644 for (j = 0; j < ncopies; j++)
5646 /* Handle uses. */
5647 if (j == 0)
5649 if (scalar_shift_arg)
5651 /* Vector shl and shr insn patterns can be defined with scalar
5652 operand 2 (shift operand). In this case, use constant or loop
5653 invariant op1 directly, without extending it to vector mode
5654 first. */
5655 optab_op2_mode = insn_data[icode].operand[2].mode;
5656 if (!VECTOR_MODE_P (optab_op2_mode))
5658 if (dump_enabled_p ())
5659 dump_printf_loc (MSG_NOTE, vect_location,
5660 "operand 1 using scalar mode.\n");
5661 vec_oprnd1 = op1;
5662 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5663 vec_oprnds1.quick_push (vec_oprnd1);
5664 if (slp_node)
5666 /* Store vec_oprnd1 for every vector stmt to be created
5667 for SLP_NODE. We check during the analysis that all
5668 the shift arguments are the same.
5669 TODO: Allow different constants for different vector
5670 stmts generated for an SLP instance. */
5671 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5672 vec_oprnds1.quick_push (vec_oprnd1);
5677 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5678 (a special case for certain kind of vector shifts); otherwise,
5679 operand 1 should be of a vector type (the usual case). */
5680 if (vec_oprnd1)
5681 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5682 slp_node);
5683 else
5684 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5685 slp_node);
5687 else
5688 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5690 /* Arguments are ready. Create the new vector stmt. */
5691 stmt_vec_info new_stmt_info = NULL;
5692 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5694 vop1 = vec_oprnds1[i];
5695 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5696 new_temp = make_ssa_name (vec_dest, new_stmt);
5697 gimple_assign_set_lhs (new_stmt, new_temp);
5698 new_stmt_info
5699 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5700 if (slp_node)
5701 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5704 if (slp_node)
5705 continue;
5707 if (j == 0)
5708 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5709 else
5710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5711 prev_stmt_info = new_stmt_info;
5714 vec_oprnds0.release ();
5715 vec_oprnds1.release ();
5717 return true;
5721 /* Function vectorizable_operation.
5723 Check if STMT_INFO performs a binary, unary or ternary operation that can
5724 be vectorized.
5725 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5726 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5727 Return true if STMT_INFO is vectorizable in this way. */
5729 static bool
5730 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5731 stmt_vec_info *vec_stmt, slp_tree slp_node,
5732 stmt_vector_for_cost *cost_vec)
5734 tree vec_dest;
5735 tree scalar_dest;
5736 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5737 tree vectype;
5738 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5739 enum tree_code code, orig_code;
5740 machine_mode vec_mode;
5741 tree new_temp;
5742 int op_type;
5743 optab optab;
5744 bool target_support_p;
5745 enum vect_def_type dt[3]
5746 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5747 int ndts = 3;
5748 stmt_vec_info prev_stmt_info;
5749 poly_uint64 nunits_in;
5750 poly_uint64 nunits_out;
5751 tree vectype_out;
5752 int ncopies;
5753 int j, i;
5754 vec<tree> vec_oprnds0 = vNULL;
5755 vec<tree> vec_oprnds1 = vNULL;
5756 vec<tree> vec_oprnds2 = vNULL;
5757 tree vop0, vop1, vop2;
5758 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5759 vec_info *vinfo = stmt_info->vinfo;
5761 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5762 return false;
5764 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5765 && ! vec_stmt)
5766 return false;
5768 /* Is STMT a vectorizable binary/unary operation? */
5769 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5770 if (!stmt)
5771 return false;
5773 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5774 return false;
5776 orig_code = code = gimple_assign_rhs_code (stmt);
5778 /* For pointer addition and subtraction, we should use the normal
5779 plus and minus for the vector operation. */
5780 if (code == POINTER_PLUS_EXPR)
5781 code = PLUS_EXPR;
5782 if (code == POINTER_DIFF_EXPR)
5783 code = MINUS_EXPR;
5785 /* Support only unary or binary operations. */
5786 op_type = TREE_CODE_LENGTH (code);
5787 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5789 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5791 "num. args = %d (not unary/binary/ternary op).\n",
5792 op_type);
5793 return false;
5796 scalar_dest = gimple_assign_lhs (stmt);
5797 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5799 /* Most operations cannot handle bit-precision types without extra
5800 truncations. */
5801 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5802 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5803 /* Exception are bitwise binary operations. */
5804 && code != BIT_IOR_EXPR
5805 && code != BIT_XOR_EXPR
5806 && code != BIT_AND_EXPR)
5808 if (dump_enabled_p ())
5809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5810 "bit-precision arithmetic not supported.\n");
5811 return false;
5814 op0 = gimple_assign_rhs1 (stmt);
5815 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5817 if (dump_enabled_p ())
5818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5819 "use not simple.\n");
5820 return false;
5822 /* If op0 is an external or constant def use a vector type with
5823 the same size as the output vector type. */
5824 if (!vectype)
5826 /* For boolean type we cannot determine vectype by
5827 invariant value (don't know whether it is a vector
5828 of booleans or vector of integers). We use output
5829 vectype because operations on boolean don't change
5830 type. */
5831 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5833 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5835 if (dump_enabled_p ())
5836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5837 "not supported operation on bool value.\n");
5838 return false;
5840 vectype = vectype_out;
5842 else
5843 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5845 if (vec_stmt)
5846 gcc_assert (vectype);
5847 if (!vectype)
5849 if (dump_enabled_p ())
5850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5851 "no vectype for scalar type %T\n",
5852 TREE_TYPE (op0));
5854 return false;
5857 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5858 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5859 if (maybe_ne (nunits_out, nunits_in))
5860 return false;
5862 if (op_type == binary_op || op_type == ternary_op)
5864 op1 = gimple_assign_rhs2 (stmt);
5865 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5867 if (dump_enabled_p ())
5868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5869 "use not simple.\n");
5870 return false;
5873 if (op_type == ternary_op)
5875 op2 = gimple_assign_rhs3 (stmt);
5876 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
5878 if (dump_enabled_p ())
5879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5880 "use not simple.\n");
5881 return false;
5885 /* Multiple types in SLP are handled by creating the appropriate number of
5886 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5887 case of SLP. */
5888 if (slp_node)
5889 ncopies = 1;
5890 else
5891 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5893 gcc_assert (ncopies >= 1);
5895 /* Shifts are handled in vectorizable_shift (). */
5896 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5897 || code == RROTATE_EXPR)
5898 return false;
5900 /* Supportable by target? */
5902 vec_mode = TYPE_MODE (vectype);
5903 if (code == MULT_HIGHPART_EXPR)
5904 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5905 else
5907 optab = optab_for_tree_code (code, vectype, optab_default);
5908 if (!optab)
5910 if (dump_enabled_p ())
5911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5912 "no optab.\n");
5913 return false;
5915 target_support_p = (optab_handler (optab, vec_mode)
5916 != CODE_FOR_nothing);
5919 if (!target_support_p)
5921 if (dump_enabled_p ())
5922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5923 "op not supported by target.\n");
5924 /* Check only during analysis. */
5925 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5926 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5927 return false;
5928 if (dump_enabled_p ())
5929 dump_printf_loc (MSG_NOTE, vect_location,
5930 "proceeding using word mode.\n");
5933 /* Worthwhile without SIMD support? Check only during analysis. */
5934 if (!VECTOR_MODE_P (vec_mode)
5935 && !vec_stmt
5936 && !vect_worthwhile_without_simd_p (vinfo, code))
5938 if (dump_enabled_p ())
5939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5940 "not worthwhile without SIMD support.\n");
5941 return false;
5944 if (!vec_stmt) /* transformation not required. */
5946 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5947 DUMP_VECT_SCOPE ("vectorizable_operation");
5948 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5949 return true;
5952 /* Transform. */
5954 if (dump_enabled_p ())
5955 dump_printf_loc (MSG_NOTE, vect_location,
5956 "transform binary/unary operation.\n");
5958 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5959 vectors with unsigned elements, but the result is signed. So, we
5960 need to compute the MINUS_EXPR into vectype temporary and
5961 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5962 tree vec_cvt_dest = NULL_TREE;
5963 if (orig_code == POINTER_DIFF_EXPR)
5965 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5966 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5968 /* Handle def. */
5969 else
5970 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5972 /* In case the vectorization factor (VF) is bigger than the number
5973 of elements that we can fit in a vectype (nunits), we have to generate
5974 more than one vector stmt - i.e - we need to "unroll" the
5975 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5976 from one copy of the vector stmt to the next, in the field
5977 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5978 stages to find the correct vector defs to be used when vectorizing
5979 stmts that use the defs of the current stmt. The example below
5980 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5981 we need to create 4 vectorized stmts):
5983 before vectorization:
5984 RELATED_STMT VEC_STMT
5985 S1: x = memref - -
5986 S2: z = x + 1 - -
5988 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5989 there):
5990 RELATED_STMT VEC_STMT
5991 VS1_0: vx0 = memref0 VS1_1 -
5992 VS1_1: vx1 = memref1 VS1_2 -
5993 VS1_2: vx2 = memref2 VS1_3 -
5994 VS1_3: vx3 = memref3 - -
5995 S1: x = load - VS1_0
5996 S2: z = x + 1 - -
5998 step2: vectorize stmt S2 (done here):
5999 To vectorize stmt S2 we first need to find the relevant vector
6000 def for the first operand 'x'. This is, as usual, obtained from
6001 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6002 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6003 relevant vector def 'vx0'. Having found 'vx0' we can generate
6004 the vector stmt VS2_0, and as usual, record it in the
6005 STMT_VINFO_VEC_STMT of stmt S2.
6006 When creating the second copy (VS2_1), we obtain the relevant vector
6007 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6008 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6009 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6010 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6011 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6012 chain of stmts and pointers:
6013 RELATED_STMT VEC_STMT
6014 VS1_0: vx0 = memref0 VS1_1 -
6015 VS1_1: vx1 = memref1 VS1_2 -
6016 VS1_2: vx2 = memref2 VS1_3 -
6017 VS1_3: vx3 = memref3 - -
6018 S1: x = load - VS1_0
6019 VS2_0: vz0 = vx0 + v1 VS2_1 -
6020 VS2_1: vz1 = vx1 + v1 VS2_2 -
6021 VS2_2: vz2 = vx2 + v1 VS2_3 -
6022 VS2_3: vz3 = vx3 + v1 - -
6023 S2: z = x + 1 - VS2_0 */
6025 prev_stmt_info = NULL;
6026 for (j = 0; j < ncopies; j++)
6028 /* Handle uses. */
6029 if (j == 0)
6031 if (op_type == binary_op)
6032 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6033 slp_node);
6034 else if (op_type == ternary_op)
6036 if (slp_node)
6038 auto_vec<tree> ops(3);
6039 ops.quick_push (op0);
6040 ops.quick_push (op1);
6041 ops.quick_push (op2);
6042 auto_vec<vec<tree> > vec_defs(3);
6043 vect_get_slp_defs (ops, slp_node, &vec_defs);
6044 vec_oprnds0 = vec_defs[0];
6045 vec_oprnds1 = vec_defs[1];
6046 vec_oprnds2 = vec_defs[2];
6048 else
6050 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6051 &vec_oprnds1, NULL);
6052 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6053 NULL, NULL);
6056 else
6057 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6058 slp_node);
6060 else
6062 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6063 if (op_type == ternary_op)
6065 tree vec_oprnd = vec_oprnds2.pop ();
6066 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6067 vec_oprnd));
6071 /* Arguments are ready. Create the new vector stmt. */
6072 stmt_vec_info new_stmt_info = NULL;
6073 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6075 vop1 = ((op_type == binary_op || op_type == ternary_op)
6076 ? vec_oprnds1[i] : NULL_TREE);
6077 vop2 = ((op_type == ternary_op)
6078 ? vec_oprnds2[i] : NULL_TREE);
6079 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6080 vop0, vop1, vop2);
6081 new_temp = make_ssa_name (vec_dest, new_stmt);
6082 gimple_assign_set_lhs (new_stmt, new_temp);
6083 new_stmt_info
6084 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6085 if (vec_cvt_dest)
6087 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6088 gassign *new_stmt
6089 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6090 new_temp);
6091 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6092 gimple_assign_set_lhs (new_stmt, new_temp);
6093 new_stmt_info
6094 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6096 if (slp_node)
6097 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6100 if (slp_node)
6101 continue;
6103 if (j == 0)
6104 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6105 else
6106 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6107 prev_stmt_info = new_stmt_info;
6110 vec_oprnds0.release ();
6111 vec_oprnds1.release ();
6112 vec_oprnds2.release ();
6114 return true;
6117 /* A helper function to ensure data reference DR_INFO's base alignment. */
6119 static void
6120 ensure_base_align (dr_vec_info *dr_info)
6122 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6123 return;
6125 if (dr_info->base_misaligned)
6127 tree base_decl = dr_info->base_decl;
6129 // We should only be able to increase the alignment of a base object if
6130 // we know what its new alignment should be at compile time.
6131 unsigned HOST_WIDE_INT align_base_to =
6132 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6134 if (decl_in_symtab_p (base_decl))
6135 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6136 else
6138 SET_DECL_ALIGN (base_decl, align_base_to);
6139 DECL_USER_ALIGN (base_decl) = 1;
6141 dr_info->base_misaligned = false;
6146 /* Function get_group_alias_ptr_type.
6148 Return the alias type for the group starting at FIRST_STMT_INFO. */
6150 static tree
6151 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6153 struct data_reference *first_dr, *next_dr;
6155 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6156 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6157 while (next_stmt_info)
6159 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6160 if (get_alias_set (DR_REF (first_dr))
6161 != get_alias_set (DR_REF (next_dr)))
6163 if (dump_enabled_p ())
6164 dump_printf_loc (MSG_NOTE, vect_location,
6165 "conflicting alias set types.\n");
6166 return ptr_type_node;
6168 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6170 return reference_alias_ptr_type (DR_REF (first_dr));
6174 /* Function vectorizable_store.
6176 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6177 that can be vectorized.
6178 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6179 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6180 Return true if STMT_INFO is vectorizable in this way. */
6182 static bool
6183 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6184 stmt_vec_info *vec_stmt, slp_tree slp_node,
6185 stmt_vector_for_cost *cost_vec)
6187 tree data_ref;
6188 tree op;
6189 tree vec_oprnd = NULL_TREE;
6190 tree elem_type;
6191 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6192 struct loop *loop = NULL;
6193 machine_mode vec_mode;
6194 tree dummy;
6195 enum dr_alignment_support alignment_support_scheme;
6196 enum vect_def_type rhs_dt = vect_unknown_def_type;
6197 enum vect_def_type mask_dt = vect_unknown_def_type;
6198 stmt_vec_info prev_stmt_info = NULL;
6199 tree dataref_ptr = NULL_TREE;
6200 tree dataref_offset = NULL_TREE;
6201 gimple *ptr_incr = NULL;
6202 int ncopies;
6203 int j;
6204 stmt_vec_info first_stmt_info;
6205 bool grouped_store;
6206 unsigned int group_size, i;
6207 vec<tree> oprnds = vNULL;
6208 vec<tree> result_chain = vNULL;
6209 tree offset = NULL_TREE;
6210 vec<tree> vec_oprnds = vNULL;
6211 bool slp = (slp_node != NULL);
6212 unsigned int vec_num;
6213 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6214 vec_info *vinfo = stmt_info->vinfo;
6215 tree aggr_type;
6216 gather_scatter_info gs_info;
6217 poly_uint64 vf;
6218 vec_load_store_type vls_type;
6219 tree ref_type;
6221 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6222 return false;
6224 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6225 && ! vec_stmt)
6226 return false;
6228 /* Is vectorizable store? */
6230 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6231 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6233 tree scalar_dest = gimple_assign_lhs (assign);
6234 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6235 && is_pattern_stmt_p (stmt_info))
6236 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6237 if (TREE_CODE (scalar_dest) != ARRAY_REF
6238 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6239 && TREE_CODE (scalar_dest) != INDIRECT_REF
6240 && TREE_CODE (scalar_dest) != COMPONENT_REF
6241 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6242 && TREE_CODE (scalar_dest) != REALPART_EXPR
6243 && TREE_CODE (scalar_dest) != MEM_REF)
6244 return false;
6246 else
6248 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6249 if (!call || !gimple_call_internal_p (call))
6250 return false;
6252 internal_fn ifn = gimple_call_internal_fn (call);
6253 if (!internal_store_fn_p (ifn))
6254 return false;
6256 if (slp_node != NULL)
6258 if (dump_enabled_p ())
6259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6260 "SLP of masked stores not supported.\n");
6261 return false;
6264 int mask_index = internal_fn_mask_index (ifn);
6265 if (mask_index >= 0)
6267 mask = gimple_call_arg (call, mask_index);
6268 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6269 &mask_vectype))
6270 return false;
6274 op = vect_get_store_rhs (stmt_info);
6276 /* Cannot have hybrid store SLP -- that would mean storing to the
6277 same location twice. */
6278 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6280 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6281 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6283 if (loop_vinfo)
6285 loop = LOOP_VINFO_LOOP (loop_vinfo);
6286 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6288 else
6289 vf = 1;
6291 /* Multiple types in SLP are handled by creating the appropriate number of
6292 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6293 case of SLP. */
6294 if (slp)
6295 ncopies = 1;
6296 else
6297 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6299 gcc_assert (ncopies >= 1);
6301 /* FORNOW. This restriction should be relaxed. */
6302 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6304 if (dump_enabled_p ())
6305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6306 "multiple types in nested loop.\n");
6307 return false;
6310 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6311 return false;
6313 elem_type = TREE_TYPE (vectype);
6314 vec_mode = TYPE_MODE (vectype);
6316 if (!STMT_VINFO_DATA_REF (stmt_info))
6317 return false;
6319 vect_memory_access_type memory_access_type;
6320 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6321 &memory_access_type, &gs_info))
6322 return false;
6324 if (mask)
6326 if (memory_access_type == VMAT_CONTIGUOUS)
6328 if (!VECTOR_MODE_P (vec_mode)
6329 || !can_vec_mask_load_store_p (vec_mode,
6330 TYPE_MODE (mask_vectype), false))
6331 return false;
6333 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6334 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6336 if (dump_enabled_p ())
6337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6338 "unsupported access type for masked store.\n");
6339 return false;
6342 else
6344 /* FORNOW. In some cases can vectorize even if data-type not supported
6345 (e.g. - array initialization with 0). */
6346 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6347 return false;
6350 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
6351 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6352 && memory_access_type != VMAT_GATHER_SCATTER
6353 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6354 if (grouped_store)
6356 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6357 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6358 group_size = DR_GROUP_SIZE (first_stmt_info);
6360 else
6362 first_stmt_info = stmt_info;
6363 first_dr_info = dr_info;
6364 group_size = vec_num = 1;
6367 if (!vec_stmt) /* transformation not required. */
6369 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6371 if (loop_vinfo
6372 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6373 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6374 memory_access_type, &gs_info);
6376 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6377 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6378 vls_type, slp_node, cost_vec);
6379 return true;
6381 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6383 /* Transform. */
6385 ensure_base_align (dr_info);
6387 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6389 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6390 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6391 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6392 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6393 edge pe = loop_preheader_edge (loop);
6394 gimple_seq seq;
6395 basic_block new_bb;
6396 enum { NARROW, NONE, WIDEN } modifier;
6397 poly_uint64 scatter_off_nunits
6398 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6400 if (known_eq (nunits, scatter_off_nunits))
6401 modifier = NONE;
6402 else if (known_eq (nunits * 2, scatter_off_nunits))
6404 modifier = WIDEN;
6406 /* Currently gathers and scatters are only supported for
6407 fixed-length vectors. */
6408 unsigned int count = scatter_off_nunits.to_constant ();
6409 vec_perm_builder sel (count, count, 1);
6410 for (i = 0; i < (unsigned int) count; ++i)
6411 sel.quick_push (i | (count / 2));
6413 vec_perm_indices indices (sel, 1, count);
6414 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6415 indices);
6416 gcc_assert (perm_mask != NULL_TREE);
6418 else if (known_eq (nunits, scatter_off_nunits * 2))
6420 modifier = NARROW;
6422 /* Currently gathers and scatters are only supported for
6423 fixed-length vectors. */
6424 unsigned int count = nunits.to_constant ();
6425 vec_perm_builder sel (count, count, 1);
6426 for (i = 0; i < (unsigned int) count; ++i)
6427 sel.quick_push (i | (count / 2));
6429 vec_perm_indices indices (sel, 2, count);
6430 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6431 gcc_assert (perm_mask != NULL_TREE);
6432 ncopies *= 2;
6434 else
6435 gcc_unreachable ();
6437 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6438 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6439 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6440 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6441 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6442 scaletype = TREE_VALUE (arglist);
6444 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6445 && TREE_CODE (rettype) == VOID_TYPE);
6447 ptr = fold_convert (ptrtype, gs_info.base);
6448 if (!is_gimple_min_invariant (ptr))
6450 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6451 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6452 gcc_assert (!new_bb);
6455 /* Currently we support only unconditional scatter stores,
6456 so mask should be all ones. */
6457 mask = build_int_cst (masktype, -1);
6458 mask = vect_init_vector (stmt_info, mask, masktype, NULL);
6460 scale = build_int_cst (scaletype, gs_info.scale);
6462 prev_stmt_info = NULL;
6463 for (j = 0; j < ncopies; ++j)
6465 if (j == 0)
6467 src = vec_oprnd1
6468 = vect_get_vec_def_for_operand (op, stmt_info);
6469 op = vec_oprnd0
6470 = vect_get_vec_def_for_operand (gs_info.offset, stmt_info);
6472 else if (modifier != NONE && (j & 1))
6474 if (modifier == WIDEN)
6476 src = vec_oprnd1
6477 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
6478 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6479 stmt_info, gsi);
6481 else if (modifier == NARROW)
6483 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6484 stmt_info, gsi);
6485 op = vec_oprnd0
6486 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
6488 else
6489 gcc_unreachable ();
6491 else
6493 src = vec_oprnd1
6494 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
6495 op = vec_oprnd0
6496 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
6499 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6501 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6502 TYPE_VECTOR_SUBPARTS (srctype)));
6503 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6504 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6505 gassign *new_stmt
6506 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6507 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6508 src = var;
6511 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6513 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6514 TYPE_VECTOR_SUBPARTS (idxtype)));
6515 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6516 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6517 gassign *new_stmt
6518 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6519 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6520 op = var;
6523 gcall *new_stmt
6524 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6525 stmt_vec_info new_stmt_info
6526 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6528 if (prev_stmt_info == NULL)
6529 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6530 else
6531 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6532 prev_stmt_info = new_stmt_info;
6534 return true;
6537 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6538 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6540 if (grouped_store)
6542 /* FORNOW */
6543 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6545 /* We vectorize all the stmts of the interleaving group when we
6546 reach the last stmt in the group. */
6547 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6548 < DR_GROUP_SIZE (first_stmt_info)
6549 && !slp)
6551 *vec_stmt = NULL;
6552 return true;
6555 if (slp)
6557 grouped_store = false;
6558 /* VEC_NUM is the number of vect stmts to be created for this
6559 group. */
6560 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6561 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6562 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6563 == first_stmt_info);
6564 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6565 op = vect_get_store_rhs (first_stmt_info);
6567 else
6568 /* VEC_NUM is the number of vect stmts to be created for this
6569 group. */
6570 vec_num = group_size;
6572 ref_type = get_group_alias_ptr_type (first_stmt_info);
6574 else
6575 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
6577 if (dump_enabled_p ())
6578 dump_printf_loc (MSG_NOTE, vect_location,
6579 "transform store. ncopies = %d\n", ncopies);
6581 if (memory_access_type == VMAT_ELEMENTWISE
6582 || memory_access_type == VMAT_STRIDED_SLP)
6584 gimple_stmt_iterator incr_gsi;
6585 bool insert_after;
6586 gimple *incr;
6587 tree offvar;
6588 tree ivstep;
6589 tree running_off;
6590 tree stride_base, stride_step, alias_off;
6591 tree vec_oprnd;
6592 unsigned int g;
6593 /* Checked by get_load_store_type. */
6594 unsigned int const_nunits = nunits.to_constant ();
6596 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6597 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6599 stride_base
6600 = fold_build_pointer_plus
6601 (DR_BASE_ADDRESS (first_dr_info->dr),
6602 size_binop (PLUS_EXPR,
6603 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6604 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6605 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
6607 /* For a store with loop-invariant (but other than power-of-2)
6608 stride (i.e. not a grouped access) like so:
6610 for (i = 0; i < n; i += stride)
6611 array[i] = ...;
6613 we generate a new induction variable and new stores from
6614 the components of the (vectorized) rhs:
6616 for (j = 0; ; j += VF*stride)
6617 vectemp = ...;
6618 tmp1 = vectemp[0];
6619 array[j] = tmp1;
6620 tmp2 = vectemp[1];
6621 array[j + stride] = tmp2;
6625 unsigned nstores = const_nunits;
6626 unsigned lnel = 1;
6627 tree ltype = elem_type;
6628 tree lvectype = vectype;
6629 if (slp)
6631 if (group_size < const_nunits
6632 && const_nunits % group_size == 0)
6634 nstores = const_nunits / group_size;
6635 lnel = group_size;
6636 ltype = build_vector_type (elem_type, group_size);
6637 lvectype = vectype;
6639 /* First check if vec_extract optab doesn't support extraction
6640 of vector elts directly. */
6641 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6642 machine_mode vmode;
6643 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6644 || !VECTOR_MODE_P (vmode)
6645 || !targetm.vector_mode_supported_p (vmode)
6646 || (convert_optab_handler (vec_extract_optab,
6647 TYPE_MODE (vectype), vmode)
6648 == CODE_FOR_nothing))
6650 /* Try to avoid emitting an extract of vector elements
6651 by performing the extracts using an integer type of the
6652 same size, extracting from a vector of those and then
6653 re-interpreting it as the original vector type if
6654 supported. */
6655 unsigned lsize
6656 = group_size * GET_MODE_BITSIZE (elmode);
6657 unsigned int lnunits = const_nunits / group_size;
6658 /* If we can't construct such a vector fall back to
6659 element extracts from the original vector type and
6660 element size stores. */
6661 if (int_mode_for_size (lsize, 0).exists (&elmode)
6662 && mode_for_vector (elmode, lnunits).exists (&vmode)
6663 && VECTOR_MODE_P (vmode)
6664 && targetm.vector_mode_supported_p (vmode)
6665 && (convert_optab_handler (vec_extract_optab,
6666 vmode, elmode)
6667 != CODE_FOR_nothing))
6669 nstores = lnunits;
6670 lnel = group_size;
6671 ltype = build_nonstandard_integer_type (lsize, 1);
6672 lvectype = build_vector_type (ltype, nstores);
6674 /* Else fall back to vector extraction anyway.
6675 Fewer stores are more important than avoiding spilling
6676 of the vector we extract from. Compared to the
6677 construction case in vectorizable_load no store-forwarding
6678 issue exists here for reasonable archs. */
6681 else if (group_size >= const_nunits
6682 && group_size % const_nunits == 0)
6684 nstores = 1;
6685 lnel = const_nunits;
6686 ltype = vectype;
6687 lvectype = vectype;
6689 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6690 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6693 ivstep = stride_step;
6694 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6695 build_int_cst (TREE_TYPE (ivstep), vf));
6697 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6699 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6700 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6701 create_iv (stride_base, ivstep, NULL,
6702 loop, &incr_gsi, insert_after,
6703 &offvar, NULL);
6704 incr = gsi_stmt (incr_gsi);
6705 loop_vinfo->add_stmt (incr);
6707 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6709 prev_stmt_info = NULL;
6710 alias_off = build_int_cst (ref_type, 0);
6711 stmt_vec_info next_stmt_info = first_stmt_info;
6712 for (g = 0; g < group_size; g++)
6714 running_off = offvar;
6715 if (g)
6717 tree size = TYPE_SIZE_UNIT (ltype);
6718 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6719 size);
6720 tree newoff = copy_ssa_name (running_off, NULL);
6721 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6722 running_off, pos);
6723 vect_finish_stmt_generation (stmt_info, incr, gsi);
6724 running_off = newoff;
6726 unsigned int group_el = 0;
6727 unsigned HOST_WIDE_INT
6728 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6729 for (j = 0; j < ncopies; j++)
6731 /* We've set op and dt above, from vect_get_store_rhs,
6732 and first_stmt_info == stmt_info. */
6733 if (j == 0)
6735 if (slp)
6737 vect_get_vec_defs (op, NULL_TREE, stmt_info,
6738 &vec_oprnds, NULL, slp_node);
6739 vec_oprnd = vec_oprnds[0];
6741 else
6743 op = vect_get_store_rhs (next_stmt_info);
6744 vec_oprnd = vect_get_vec_def_for_operand
6745 (op, next_stmt_info);
6748 else
6750 if (slp)
6751 vec_oprnd = vec_oprnds[j];
6752 else
6753 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6754 vec_oprnd);
6756 /* Pun the vector to extract from if necessary. */
6757 if (lvectype != vectype)
6759 tree tem = make_ssa_name (lvectype);
6760 gimple *pun
6761 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6762 lvectype, vec_oprnd));
6763 vect_finish_stmt_generation (stmt_info, pun, gsi);
6764 vec_oprnd = tem;
6766 for (i = 0; i < nstores; i++)
6768 tree newref, newoff;
6769 gimple *incr, *assign;
6770 tree size = TYPE_SIZE (ltype);
6771 /* Extract the i'th component. */
6772 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6773 bitsize_int (i), size);
6774 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6775 size, pos);
6777 elem = force_gimple_operand_gsi (gsi, elem, true,
6778 NULL_TREE, true,
6779 GSI_SAME_STMT);
6781 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6782 group_el * elsz);
6783 newref = build2 (MEM_REF, ltype,
6784 running_off, this_off);
6785 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
6787 /* And store it to *running_off. */
6788 assign = gimple_build_assign (newref, elem);
6789 stmt_vec_info assign_info
6790 = vect_finish_stmt_generation (stmt_info, assign, gsi);
6792 group_el += lnel;
6793 if (! slp
6794 || group_el == group_size)
6796 newoff = copy_ssa_name (running_off, NULL);
6797 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6798 running_off, stride_step);
6799 vect_finish_stmt_generation (stmt_info, incr, gsi);
6801 running_off = newoff;
6802 group_el = 0;
6804 if (g == group_size - 1
6805 && !slp)
6807 if (j == 0 && i == 0)
6808 STMT_VINFO_VEC_STMT (stmt_info)
6809 = *vec_stmt = assign_info;
6810 else
6811 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6812 prev_stmt_info = assign_info;
6816 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6817 if (slp)
6818 break;
6821 vec_oprnds.release ();
6822 return true;
6825 auto_vec<tree> dr_chain (group_size);
6826 oprnds.create (group_size);
6828 alignment_support_scheme
6829 = vect_supportable_dr_alignment (first_dr_info, false);
6830 gcc_assert (alignment_support_scheme);
6831 vec_loop_masks *loop_masks
6832 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6833 ? &LOOP_VINFO_MASKS (loop_vinfo)
6834 : NULL);
6835 /* Targets with store-lane instructions must not require explicit
6836 realignment. vect_supportable_dr_alignment always returns either
6837 dr_aligned or dr_unaligned_supported for masked operations. */
6838 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6839 && !mask
6840 && !loop_masks)
6841 || alignment_support_scheme == dr_aligned
6842 || alignment_support_scheme == dr_unaligned_supported);
6844 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6845 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6846 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6848 tree bump;
6849 tree vec_offset = NULL_TREE;
6850 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6852 aggr_type = NULL_TREE;
6853 bump = NULL_TREE;
6855 else if (memory_access_type == VMAT_GATHER_SCATTER)
6857 aggr_type = elem_type;
6858 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
6859 &bump, &vec_offset);
6861 else
6863 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6864 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6865 else
6866 aggr_type = vectype;
6867 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
6868 memory_access_type);
6871 if (mask)
6872 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6874 /* In case the vectorization factor (VF) is bigger than the number
6875 of elements that we can fit in a vectype (nunits), we have to generate
6876 more than one vector stmt - i.e - we need to "unroll" the
6877 vector stmt by a factor VF/nunits. For more details see documentation in
6878 vect_get_vec_def_for_copy_stmt. */
6880 /* In case of interleaving (non-unit grouped access):
6882 S1: &base + 2 = x2
6883 S2: &base = x0
6884 S3: &base + 1 = x1
6885 S4: &base + 3 = x3
6887 We create vectorized stores starting from base address (the access of the
6888 first stmt in the chain (S2 in the above example), when the last store stmt
6889 of the chain (S4) is reached:
6891 VS1: &base = vx2
6892 VS2: &base + vec_size*1 = vx0
6893 VS3: &base + vec_size*2 = vx1
6894 VS4: &base + vec_size*3 = vx3
6896 Then permutation statements are generated:
6898 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6899 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6902 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6903 (the order of the data-refs in the output of vect_permute_store_chain
6904 corresponds to the order of scalar stmts in the interleaving chain - see
6905 the documentation of vect_permute_store_chain()).
6907 In case of both multiple types and interleaving, above vector stores and
6908 permutation stmts are created for every copy. The result vector stmts are
6909 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6910 STMT_VINFO_RELATED_STMT for the next copies.
6913 prev_stmt_info = NULL;
6914 tree vec_mask = NULL_TREE;
6915 for (j = 0; j < ncopies; j++)
6917 stmt_vec_info new_stmt_info;
6918 if (j == 0)
6920 if (slp)
6922 /* Get vectorized arguments for SLP_NODE. */
6923 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
6924 NULL, slp_node);
6926 vec_oprnd = vec_oprnds[0];
6928 else
6930 /* For interleaved stores we collect vectorized defs for all the
6931 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6932 used as an input to vect_permute_store_chain(), and OPRNDS as
6933 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6935 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6936 OPRNDS are of size 1. */
6937 stmt_vec_info next_stmt_info = first_stmt_info;
6938 for (i = 0; i < group_size; i++)
6940 /* Since gaps are not supported for interleaved stores,
6941 DR_GROUP_SIZE is the exact number of stmts in the chain.
6942 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6943 that there is no interleaving, DR_GROUP_SIZE is 1,
6944 and only one iteration of the loop will be executed. */
6945 op = vect_get_store_rhs (next_stmt_info);
6946 vec_oprnd = vect_get_vec_def_for_operand
6947 (op, next_stmt_info);
6948 dr_chain.quick_push (vec_oprnd);
6949 oprnds.quick_push (vec_oprnd);
6950 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6952 if (mask)
6953 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
6954 mask_vectype);
6957 /* We should have catched mismatched types earlier. */
6958 gcc_assert (useless_type_conversion_p (vectype,
6959 TREE_TYPE (vec_oprnd)));
6960 bool simd_lane_access_p
6961 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6962 if (simd_lane_access_p
6963 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
6964 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
6965 && integer_zerop (DR_OFFSET (first_dr_info->dr))
6966 && integer_zerop (DR_INIT (first_dr_info->dr))
6967 && alias_sets_conflict_p (get_alias_set (aggr_type),
6968 get_alias_set (TREE_TYPE (ref_type))))
6970 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
6971 dataref_offset = build_int_cst (ref_type, 0);
6973 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6974 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
6975 &dataref_ptr, &vec_offset);
6976 else
6977 dataref_ptr
6978 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
6979 simd_lane_access_p ? loop : NULL,
6980 offset, &dummy, gsi, &ptr_incr,
6981 simd_lane_access_p, NULL_TREE, bump);
6983 else
6985 /* For interleaved stores we created vectorized defs for all the
6986 defs stored in OPRNDS in the previous iteration (previous copy).
6987 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6988 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6989 next copy.
6990 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6991 OPRNDS are of size 1. */
6992 for (i = 0; i < group_size; i++)
6994 op = oprnds[i];
6995 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
6996 dr_chain[i] = vec_oprnd;
6997 oprnds[i] = vec_oprnd;
6999 if (mask)
7000 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
7001 if (dataref_offset)
7002 dataref_offset
7003 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7004 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7005 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7006 else
7007 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7008 stmt_info, bump);
7011 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7013 tree vec_array;
7015 /* Get an array into which we can store the individual vectors. */
7016 vec_array = create_vector_array (vectype, vec_num);
7018 /* Invalidate the current contents of VEC_ARRAY. This should
7019 become an RTL clobber too, which prevents the vector registers
7020 from being upward-exposed. */
7021 vect_clobber_variable (stmt_info, gsi, vec_array);
7023 /* Store the individual vectors into the array. */
7024 for (i = 0; i < vec_num; i++)
7026 vec_oprnd = dr_chain[i];
7027 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7030 tree final_mask = NULL;
7031 if (loop_masks)
7032 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7033 vectype, j);
7034 if (vec_mask)
7035 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7036 vec_mask, gsi);
7038 gcall *call;
7039 if (final_mask)
7041 /* Emit:
7042 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7043 VEC_ARRAY). */
7044 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7045 tree alias_ptr = build_int_cst (ref_type, align);
7046 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7047 dataref_ptr, alias_ptr,
7048 final_mask, vec_array);
7050 else
7052 /* Emit:
7053 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7054 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7055 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7056 vec_array);
7057 gimple_call_set_lhs (call, data_ref);
7059 gimple_call_set_nothrow (call, true);
7060 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7062 /* Record that VEC_ARRAY is now dead. */
7063 vect_clobber_variable (stmt_info, gsi, vec_array);
7065 else
7067 new_stmt_info = NULL;
7068 if (grouped_store)
7070 if (j == 0)
7071 result_chain.create (group_size);
7072 /* Permute. */
7073 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7074 &result_chain);
7077 stmt_vec_info next_stmt_info = first_stmt_info;
7078 for (i = 0; i < vec_num; i++)
7080 unsigned misalign;
7081 unsigned HOST_WIDE_INT align;
7083 tree final_mask = NULL_TREE;
7084 if (loop_masks)
7085 final_mask = vect_get_loop_mask (gsi, loop_masks,
7086 vec_num * ncopies,
7087 vectype, vec_num * j + i);
7088 if (vec_mask)
7089 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7090 vec_mask, gsi);
7092 if (memory_access_type == VMAT_GATHER_SCATTER)
7094 tree scale = size_int (gs_info.scale);
7095 gcall *call;
7096 if (loop_masks)
7097 call = gimple_build_call_internal
7098 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7099 scale, vec_oprnd, final_mask);
7100 else
7101 call = gimple_build_call_internal
7102 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7103 scale, vec_oprnd);
7104 gimple_call_set_nothrow (call, true);
7105 new_stmt_info
7106 = vect_finish_stmt_generation (stmt_info, call, gsi);
7107 break;
7110 if (i > 0)
7111 /* Bump the vector pointer. */
7112 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7113 stmt_info, bump);
7115 if (slp)
7116 vec_oprnd = vec_oprnds[i];
7117 else if (grouped_store)
7118 /* For grouped stores vectorized defs are interleaved in
7119 vect_permute_store_chain(). */
7120 vec_oprnd = result_chain[i];
7122 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
7123 if (aligned_access_p (first_dr_info))
7124 misalign = 0;
7125 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7127 align = dr_alignment (vect_dr_behavior (first_dr_info));
7128 misalign = 0;
7130 else
7131 misalign = DR_MISALIGNMENT (first_dr_info);
7132 if (dataref_offset == NULL_TREE
7133 && TREE_CODE (dataref_ptr) == SSA_NAME)
7134 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7135 misalign);
7137 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7139 tree perm_mask = perm_mask_for_reverse (vectype);
7140 tree perm_dest = vect_create_destination_var
7141 (vect_get_store_rhs (stmt_info), vectype);
7142 tree new_temp = make_ssa_name (perm_dest);
7144 /* Generate the permute statement. */
7145 gimple *perm_stmt
7146 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7147 vec_oprnd, perm_mask);
7148 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7150 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7151 vec_oprnd = new_temp;
7154 /* Arguments are ready. Create the new vector stmt. */
7155 if (final_mask)
7157 align = least_bit_hwi (misalign | align);
7158 tree ptr = build_int_cst (ref_type, align);
7159 gcall *call
7160 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7161 dataref_ptr, ptr,
7162 final_mask, vec_oprnd);
7163 gimple_call_set_nothrow (call, true);
7164 new_stmt_info
7165 = vect_finish_stmt_generation (stmt_info, call, gsi);
7167 else
7169 data_ref = fold_build2 (MEM_REF, vectype,
7170 dataref_ptr,
7171 dataref_offset
7172 ? dataref_offset
7173 : build_int_cst (ref_type, 0));
7174 if (aligned_access_p (first_dr_info))
7176 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7177 TREE_TYPE (data_ref)
7178 = build_aligned_type (TREE_TYPE (data_ref),
7179 align * BITS_PER_UNIT);
7180 else
7181 TREE_TYPE (data_ref)
7182 = build_aligned_type (TREE_TYPE (data_ref),
7183 TYPE_ALIGN (elem_type));
7184 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7185 gassign *new_stmt
7186 = gimple_build_assign (data_ref, vec_oprnd);
7187 new_stmt_info
7188 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7191 if (slp)
7192 continue;
7194 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7195 if (!next_stmt_info)
7196 break;
7199 if (!slp)
7201 if (j == 0)
7202 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7203 else
7204 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7205 prev_stmt_info = new_stmt_info;
7209 oprnds.release ();
7210 result_chain.release ();
7211 vec_oprnds.release ();
7213 return true;
7216 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7217 VECTOR_CST mask. No checks are made that the target platform supports the
7218 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7219 vect_gen_perm_mask_checked. */
7221 tree
7222 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7224 tree mask_type;
7226 poly_uint64 nunits = sel.length ();
7227 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7229 mask_type = build_vector_type (ssizetype, nunits);
7230 return vec_perm_indices_to_tree (mask_type, sel);
7233 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7234 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7236 tree
7237 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7239 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7240 return vect_gen_perm_mask_any (vectype, sel);
7243 /* Given a vector variable X and Y, that was generated for the scalar
7244 STMT_INFO, generate instructions to permute the vector elements of X and Y
7245 using permutation mask MASK_VEC, insert them at *GSI and return the
7246 permuted vector variable. */
7248 static tree
7249 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7250 gimple_stmt_iterator *gsi)
7252 tree vectype = TREE_TYPE (x);
7253 tree perm_dest, data_ref;
7254 gimple *perm_stmt;
7256 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7257 if (TREE_CODE (scalar_dest) == SSA_NAME)
7258 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7259 else
7260 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7261 data_ref = make_ssa_name (perm_dest);
7263 /* Generate the permute statement. */
7264 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7265 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7267 return data_ref;
7270 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7271 inserting them on the loops preheader edge. Returns true if we
7272 were successful in doing so (and thus STMT_INFO can be moved then),
7273 otherwise returns false. */
7275 static bool
7276 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7278 ssa_op_iter i;
7279 tree op;
7280 bool any = false;
7282 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7284 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7285 if (!gimple_nop_p (def_stmt)
7286 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7288 /* Make sure we don't need to recurse. While we could do
7289 so in simple cases when there are more complex use webs
7290 we don't have an easy way to preserve stmt order to fulfil
7291 dependencies within them. */
7292 tree op2;
7293 ssa_op_iter i2;
7294 if (gimple_code (def_stmt) == GIMPLE_PHI)
7295 return false;
7296 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7298 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7299 if (!gimple_nop_p (def_stmt2)
7300 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7301 return false;
7303 any = true;
7307 if (!any)
7308 return true;
7310 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7312 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7313 if (!gimple_nop_p (def_stmt)
7314 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7316 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7317 gsi_remove (&gsi, false);
7318 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7322 return true;
7325 /* vectorizable_load.
7327 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7328 that can be vectorized.
7329 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7330 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7331 Return true if STMT_INFO is vectorizable in this way. */
7333 static bool
7334 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7335 stmt_vec_info *vec_stmt, slp_tree slp_node,
7336 slp_instance slp_node_instance,
7337 stmt_vector_for_cost *cost_vec)
7339 tree scalar_dest;
7340 tree vec_dest = NULL;
7341 tree data_ref = NULL;
7342 stmt_vec_info prev_stmt_info;
7343 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7344 struct loop *loop = NULL;
7345 struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7346 bool nested_in_vect_loop = false;
7347 tree elem_type;
7348 tree new_temp;
7349 machine_mode mode;
7350 tree dummy;
7351 enum dr_alignment_support alignment_support_scheme;
7352 tree dataref_ptr = NULL_TREE;
7353 tree dataref_offset = NULL_TREE;
7354 gimple *ptr_incr = NULL;
7355 int ncopies;
7356 int i, j;
7357 unsigned int group_size;
7358 poly_uint64 group_gap_adj;
7359 tree msq = NULL_TREE, lsq;
7360 tree offset = NULL_TREE;
7361 tree byte_offset = NULL_TREE;
7362 tree realignment_token = NULL_TREE;
7363 gphi *phi = NULL;
7364 vec<tree> dr_chain = vNULL;
7365 bool grouped_load = false;
7366 stmt_vec_info first_stmt_info;
7367 stmt_vec_info first_stmt_info_for_drptr = NULL;
7368 bool compute_in_loop = false;
7369 struct loop *at_loop;
7370 int vec_num;
7371 bool slp = (slp_node != NULL);
7372 bool slp_perm = false;
7373 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7374 poly_uint64 vf;
7375 tree aggr_type;
7376 gather_scatter_info gs_info;
7377 vec_info *vinfo = stmt_info->vinfo;
7378 tree ref_type;
7379 enum vect_def_type mask_dt = vect_unknown_def_type;
7381 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7382 return false;
7384 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7385 && ! vec_stmt)
7386 return false;
7388 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7389 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7391 scalar_dest = gimple_assign_lhs (assign);
7392 if (TREE_CODE (scalar_dest) != SSA_NAME)
7393 return false;
7395 tree_code code = gimple_assign_rhs_code (assign);
7396 if (code != ARRAY_REF
7397 && code != BIT_FIELD_REF
7398 && code != INDIRECT_REF
7399 && code != COMPONENT_REF
7400 && code != IMAGPART_EXPR
7401 && code != REALPART_EXPR
7402 && code != MEM_REF
7403 && TREE_CODE_CLASS (code) != tcc_declaration)
7404 return false;
7406 else
7408 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7409 if (!call || !gimple_call_internal_p (call))
7410 return false;
7412 internal_fn ifn = gimple_call_internal_fn (call);
7413 if (!internal_load_fn_p (ifn))
7414 return false;
7416 scalar_dest = gimple_call_lhs (call);
7417 if (!scalar_dest)
7418 return false;
7420 if (slp_node != NULL)
7422 if (dump_enabled_p ())
7423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7424 "SLP of masked loads not supported.\n");
7425 return false;
7428 int mask_index = internal_fn_mask_index (ifn);
7429 if (mask_index >= 0)
7431 mask = gimple_call_arg (call, mask_index);
7432 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7433 &mask_vectype))
7434 return false;
7438 if (!STMT_VINFO_DATA_REF (stmt_info))
7439 return false;
7441 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7442 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7444 if (loop_vinfo)
7446 loop = LOOP_VINFO_LOOP (loop_vinfo);
7447 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7448 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7450 else
7451 vf = 1;
7453 /* Multiple types in SLP are handled by creating the appropriate number of
7454 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7455 case of SLP. */
7456 if (slp)
7457 ncopies = 1;
7458 else
7459 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7461 gcc_assert (ncopies >= 1);
7463 /* FORNOW. This restriction should be relaxed. */
7464 if (nested_in_vect_loop && ncopies > 1)
7466 if (dump_enabled_p ())
7467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7468 "multiple types in nested loop.\n");
7469 return false;
7472 /* Invalidate assumptions made by dependence analysis when vectorization
7473 on the unrolled body effectively re-orders stmts. */
7474 if (ncopies > 1
7475 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7476 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7477 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7479 if (dump_enabled_p ())
7480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7481 "cannot perform implicit CSE when unrolling "
7482 "with negative dependence distance\n");
7483 return false;
7486 elem_type = TREE_TYPE (vectype);
7487 mode = TYPE_MODE (vectype);
7489 /* FORNOW. In some cases can vectorize even if data-type not supported
7490 (e.g. - data copies). */
7491 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7493 if (dump_enabled_p ())
7494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7495 "Aligned load, but unsupported type.\n");
7496 return false;
7499 /* Check if the load is a part of an interleaving chain. */
7500 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7502 grouped_load = true;
7503 /* FORNOW */
7504 gcc_assert (!nested_in_vect_loop);
7505 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7507 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7508 group_size = DR_GROUP_SIZE (first_stmt_info);
7510 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7511 slp_perm = true;
7513 /* Invalidate assumptions made by dependence analysis when vectorization
7514 on the unrolled body effectively re-orders stmts. */
7515 if (!PURE_SLP_STMT (stmt_info)
7516 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7517 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7518 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7520 if (dump_enabled_p ())
7521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7522 "cannot perform implicit CSE when performing "
7523 "group loads with negative dependence distance\n");
7524 return false;
7527 /* Similarly when the stmt is a load that is both part of a SLP
7528 instance and a loop vectorized stmt via the same-dr mechanism
7529 we have to give up. */
7530 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7531 && (STMT_SLP_TYPE (stmt_info)
7532 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
7534 if (dump_enabled_p ())
7535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7536 "conflicting SLP types for CSEd load\n");
7537 return false;
7540 else
7541 group_size = 1;
7543 vect_memory_access_type memory_access_type;
7544 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7545 &memory_access_type, &gs_info))
7546 return false;
7548 if (mask)
7550 if (memory_access_type == VMAT_CONTIGUOUS)
7552 machine_mode vec_mode = TYPE_MODE (vectype);
7553 if (!VECTOR_MODE_P (vec_mode)
7554 || !can_vec_mask_load_store_p (vec_mode,
7555 TYPE_MODE (mask_vectype), true))
7556 return false;
7558 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7560 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7561 tree masktype
7562 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7563 if (TREE_CODE (masktype) == INTEGER_TYPE)
7565 if (dump_enabled_p ())
7566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7567 "masked gather with integer mask not"
7568 " supported.");
7569 return false;
7572 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7573 && memory_access_type != VMAT_GATHER_SCATTER)
7575 if (dump_enabled_p ())
7576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7577 "unsupported access type for masked load.\n");
7578 return false;
7582 if (!vec_stmt) /* transformation not required. */
7584 if (!slp)
7585 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7587 if (loop_vinfo
7588 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7589 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7590 memory_access_type, &gs_info);
7592 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7593 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7594 slp_node_instance, slp_node, cost_vec);
7595 return true;
7598 if (!slp)
7599 gcc_assert (memory_access_type
7600 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7602 if (dump_enabled_p ())
7603 dump_printf_loc (MSG_NOTE, vect_location,
7604 "transform load. ncopies = %d\n", ncopies);
7606 /* Transform. */
7608 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7609 ensure_base_align (dr_info);
7611 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7613 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7614 return true;
7617 if (memory_access_type == VMAT_INVARIANT)
7619 gcc_assert (!grouped_load && !mask && !bb_vinfo);
7620 /* If we have versioned for aliasing or the loop doesn't
7621 have any data dependencies that would preclude this,
7622 then we are sure this is a loop invariant load and
7623 thus we can insert it on the preheader edge. */
7624 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7625 && !nested_in_vect_loop
7626 && hoist_defs_of_uses (stmt_info, loop));
7627 if (hoist_p)
7629 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7630 if (dump_enabled_p ())
7631 dump_printf_loc (MSG_NOTE, vect_location,
7632 "hoisting out of the vectorized loop: %G", stmt);
7633 scalar_dest = copy_ssa_name (scalar_dest);
7634 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7635 gsi_insert_on_edge_immediate
7636 (loop_preheader_edge (loop),
7637 gimple_build_assign (scalar_dest, rhs));
7639 /* These copies are all equivalent, but currently the representation
7640 requires a separate STMT_VINFO_VEC_STMT for each one. */
7641 prev_stmt_info = NULL;
7642 gimple_stmt_iterator gsi2 = *gsi;
7643 gsi_next (&gsi2);
7644 for (j = 0; j < ncopies; j++)
7646 stmt_vec_info new_stmt_info;
7647 if (hoist_p)
7649 new_temp = vect_init_vector (stmt_info, scalar_dest,
7650 vectype, NULL);
7651 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7652 new_stmt_info = vinfo->add_stmt (new_stmt);
7654 else
7656 new_temp = vect_init_vector (stmt_info, scalar_dest,
7657 vectype, &gsi2);
7658 new_stmt_info = vinfo->lookup_def (new_temp);
7660 if (slp)
7661 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7662 else if (j == 0)
7663 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7664 else
7665 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7666 prev_stmt_info = new_stmt_info;
7668 return true;
7671 if (memory_access_type == VMAT_ELEMENTWISE
7672 || memory_access_type == VMAT_STRIDED_SLP)
7674 gimple_stmt_iterator incr_gsi;
7675 bool insert_after;
7676 gimple *incr;
7677 tree offvar;
7678 tree ivstep;
7679 tree running_off;
7680 vec<constructor_elt, va_gc> *v = NULL;
7681 tree stride_base, stride_step, alias_off;
7682 /* Checked by get_load_store_type. */
7683 unsigned int const_nunits = nunits.to_constant ();
7684 unsigned HOST_WIDE_INT cst_offset = 0;
7686 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7687 gcc_assert (!nested_in_vect_loop);
7689 if (grouped_load)
7691 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7692 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7694 else
7696 first_stmt_info = stmt_info;
7697 first_dr_info = dr_info;
7699 if (slp && grouped_load)
7701 group_size = DR_GROUP_SIZE (first_stmt_info);
7702 ref_type = get_group_alias_ptr_type (first_stmt_info);
7704 else
7706 if (grouped_load)
7707 cst_offset
7708 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7709 * vect_get_place_in_interleaving_chain (stmt_info,
7710 first_stmt_info));
7711 group_size = 1;
7712 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7715 stride_base
7716 = fold_build_pointer_plus
7717 (DR_BASE_ADDRESS (first_dr_info->dr),
7718 size_binop (PLUS_EXPR,
7719 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7720 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7721 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7723 /* For a load with loop-invariant (but other than power-of-2)
7724 stride (i.e. not a grouped access) like so:
7726 for (i = 0; i < n; i += stride)
7727 ... = array[i];
7729 we generate a new induction variable and new accesses to
7730 form a new vector (or vectors, depending on ncopies):
7732 for (j = 0; ; j += VF*stride)
7733 tmp1 = array[j];
7734 tmp2 = array[j + stride];
7736 vectemp = {tmp1, tmp2, ...}
7739 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7740 build_int_cst (TREE_TYPE (stride_step), vf));
7742 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7744 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7745 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7746 create_iv (stride_base, ivstep, NULL,
7747 loop, &incr_gsi, insert_after,
7748 &offvar, NULL);
7749 incr = gsi_stmt (incr_gsi);
7750 loop_vinfo->add_stmt (incr);
7752 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7754 prev_stmt_info = NULL;
7755 running_off = offvar;
7756 alias_off = build_int_cst (ref_type, 0);
7757 int nloads = const_nunits;
7758 int lnel = 1;
7759 tree ltype = TREE_TYPE (vectype);
7760 tree lvectype = vectype;
7761 auto_vec<tree> dr_chain;
7762 if (memory_access_type == VMAT_STRIDED_SLP)
7764 if (group_size < const_nunits)
7766 /* First check if vec_init optab supports construction from
7767 vector elts directly. */
7768 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7769 machine_mode vmode;
7770 if (mode_for_vector (elmode, group_size).exists (&vmode)
7771 && VECTOR_MODE_P (vmode)
7772 && targetm.vector_mode_supported_p (vmode)
7773 && (convert_optab_handler (vec_init_optab,
7774 TYPE_MODE (vectype), vmode)
7775 != CODE_FOR_nothing))
7777 nloads = const_nunits / group_size;
7778 lnel = group_size;
7779 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7781 else
7783 /* Otherwise avoid emitting a constructor of vector elements
7784 by performing the loads using an integer type of the same
7785 size, constructing a vector of those and then
7786 re-interpreting it as the original vector type.
7787 This avoids a huge runtime penalty due to the general
7788 inability to perform store forwarding from smaller stores
7789 to a larger load. */
7790 unsigned lsize
7791 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7792 unsigned int lnunits = const_nunits / group_size;
7793 /* If we can't construct such a vector fall back to
7794 element loads of the original vector type. */
7795 if (int_mode_for_size (lsize, 0).exists (&elmode)
7796 && mode_for_vector (elmode, lnunits).exists (&vmode)
7797 && VECTOR_MODE_P (vmode)
7798 && targetm.vector_mode_supported_p (vmode)
7799 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7800 != CODE_FOR_nothing))
7802 nloads = lnunits;
7803 lnel = group_size;
7804 ltype = build_nonstandard_integer_type (lsize, 1);
7805 lvectype = build_vector_type (ltype, nloads);
7809 else
7811 nloads = 1;
7812 lnel = const_nunits;
7813 ltype = vectype;
7815 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7817 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7818 else if (nloads == 1)
7819 ltype = vectype;
7821 if (slp)
7823 /* For SLP permutation support we need to load the whole group,
7824 not only the number of vector stmts the permutation result
7825 fits in. */
7826 if (slp_perm)
7828 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7829 variable VF. */
7830 unsigned int const_vf = vf.to_constant ();
7831 ncopies = CEIL (group_size * const_vf, const_nunits);
7832 dr_chain.create (ncopies);
7834 else
7835 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7837 unsigned int group_el = 0;
7838 unsigned HOST_WIDE_INT
7839 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7840 for (j = 0; j < ncopies; j++)
7842 if (nloads > 1)
7843 vec_alloc (v, nloads);
7844 stmt_vec_info new_stmt_info = NULL;
7845 for (i = 0; i < nloads; i++)
7847 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7848 group_el * elsz + cst_offset);
7849 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7850 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7851 gassign *new_stmt
7852 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7853 new_stmt_info
7854 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7855 if (nloads > 1)
7856 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7857 gimple_assign_lhs (new_stmt));
7859 group_el += lnel;
7860 if (! slp
7861 || group_el == group_size)
7863 tree newoff = copy_ssa_name (running_off);
7864 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7865 running_off, stride_step);
7866 vect_finish_stmt_generation (stmt_info, incr, gsi);
7868 running_off = newoff;
7869 group_el = 0;
7872 if (nloads > 1)
7874 tree vec_inv = build_constructor (lvectype, v);
7875 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
7876 new_stmt_info = vinfo->lookup_def (new_temp);
7877 if (lvectype != vectype)
7879 gassign *new_stmt
7880 = gimple_build_assign (make_ssa_name (vectype),
7881 VIEW_CONVERT_EXPR,
7882 build1 (VIEW_CONVERT_EXPR,
7883 vectype, new_temp));
7884 new_stmt_info
7885 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7889 if (slp)
7891 if (slp_perm)
7892 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
7893 else
7894 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7896 else
7898 if (j == 0)
7899 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7900 else
7901 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7902 prev_stmt_info = new_stmt_info;
7905 if (slp_perm)
7907 unsigned n_perms;
7908 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7909 slp_node_instance, false, &n_perms);
7911 return true;
7914 if (memory_access_type == VMAT_GATHER_SCATTER
7915 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7916 grouped_load = false;
7918 if (grouped_load)
7920 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7921 group_size = DR_GROUP_SIZE (first_stmt_info);
7922 /* For SLP vectorization we directly vectorize a subchain
7923 without permutation. */
7924 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7925 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7926 /* For BB vectorization always use the first stmt to base
7927 the data ref pointer on. */
7928 if (bb_vinfo)
7929 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7931 /* Check if the chain of loads is already vectorized. */
7932 if (STMT_VINFO_VEC_STMT (first_stmt_info)
7933 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7934 ??? But we can only do so if there is exactly one
7935 as we have no way to get at the rest. Leave the CSE
7936 opportunity alone.
7937 ??? With the group load eventually participating
7938 in multiple different permutations (having multiple
7939 slp nodes which refer to the same group) the CSE
7940 is even wrong code. See PR56270. */
7941 && !slp)
7943 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7944 return true;
7946 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7947 group_gap_adj = 0;
7949 /* VEC_NUM is the number of vect stmts to be created for this group. */
7950 if (slp)
7952 grouped_load = false;
7953 /* If an SLP permutation is from N elements to N elements,
7954 and if one vector holds a whole number of N, we can load
7955 the inputs to the permutation in the same way as an
7956 unpermuted sequence. In other cases we need to load the
7957 whole group, not only the number of vector stmts the
7958 permutation result fits in. */
7959 if (slp_perm
7960 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
7961 || !multiple_p (nunits, group_size)))
7963 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
7964 variable VF; see vect_transform_slp_perm_load. */
7965 unsigned int const_vf = vf.to_constant ();
7966 unsigned int const_nunits = nunits.to_constant ();
7967 vec_num = CEIL (group_size * const_vf, const_nunits);
7968 group_gap_adj = vf * group_size - nunits * vec_num;
7970 else
7972 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7973 group_gap_adj
7974 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7977 else
7978 vec_num = group_size;
7980 ref_type = get_group_alias_ptr_type (first_stmt_info);
7982 else
7984 first_stmt_info = stmt_info;
7985 first_dr_info = dr_info;
7986 group_size = vec_num = 1;
7987 group_gap_adj = 0;
7988 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7991 alignment_support_scheme
7992 = vect_supportable_dr_alignment (first_dr_info, false);
7993 gcc_assert (alignment_support_scheme);
7994 vec_loop_masks *loop_masks
7995 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7996 ? &LOOP_VINFO_MASKS (loop_vinfo)
7997 : NULL);
7998 /* Targets with store-lane instructions must not require explicit
7999 realignment. vect_supportable_dr_alignment always returns either
8000 dr_aligned or dr_unaligned_supported for masked operations. */
8001 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8002 && !mask
8003 && !loop_masks)
8004 || alignment_support_scheme == dr_aligned
8005 || alignment_support_scheme == dr_unaligned_supported);
8007 /* In case the vectorization factor (VF) is bigger than the number
8008 of elements that we can fit in a vectype (nunits), we have to generate
8009 more than one vector stmt - i.e - we need to "unroll" the
8010 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8011 from one copy of the vector stmt to the next, in the field
8012 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8013 stages to find the correct vector defs to be used when vectorizing
8014 stmts that use the defs of the current stmt. The example below
8015 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8016 need to create 4 vectorized stmts):
8018 before vectorization:
8019 RELATED_STMT VEC_STMT
8020 S1: x = memref - -
8021 S2: z = x + 1 - -
8023 step 1: vectorize stmt S1:
8024 We first create the vector stmt VS1_0, and, as usual, record a
8025 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8026 Next, we create the vector stmt VS1_1, and record a pointer to
8027 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8028 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8029 stmts and pointers:
8030 RELATED_STMT VEC_STMT
8031 VS1_0: vx0 = memref0 VS1_1 -
8032 VS1_1: vx1 = memref1 VS1_2 -
8033 VS1_2: vx2 = memref2 VS1_3 -
8034 VS1_3: vx3 = memref3 - -
8035 S1: x = load - VS1_0
8036 S2: z = x + 1 - -
8038 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8039 information we recorded in RELATED_STMT field is used to vectorize
8040 stmt S2. */
8042 /* In case of interleaving (non-unit grouped access):
8044 S1: x2 = &base + 2
8045 S2: x0 = &base
8046 S3: x1 = &base + 1
8047 S4: x3 = &base + 3
8049 Vectorized loads are created in the order of memory accesses
8050 starting from the access of the first stmt of the chain:
8052 VS1: vx0 = &base
8053 VS2: vx1 = &base + vec_size*1
8054 VS3: vx3 = &base + vec_size*2
8055 VS4: vx4 = &base + vec_size*3
8057 Then permutation statements are generated:
8059 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8060 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8063 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8064 (the order of the data-refs in the output of vect_permute_load_chain
8065 corresponds to the order of scalar stmts in the interleaving chain - see
8066 the documentation of vect_permute_load_chain()).
8067 The generation of permutation stmts and recording them in
8068 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8070 In case of both multiple types and interleaving, the vector loads and
8071 permutation stmts above are created for every copy. The result vector
8072 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8073 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8075 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8076 on a target that supports unaligned accesses (dr_unaligned_supported)
8077 we generate the following code:
8078 p = initial_addr;
8079 indx = 0;
8080 loop {
8081 p = p + indx * vectype_size;
8082 vec_dest = *(p);
8083 indx = indx + 1;
8086 Otherwise, the data reference is potentially unaligned on a target that
8087 does not support unaligned accesses (dr_explicit_realign_optimized) -
8088 then generate the following code, in which the data in each iteration is
8089 obtained by two vector loads, one from the previous iteration, and one
8090 from the current iteration:
8091 p1 = initial_addr;
8092 msq_init = *(floor(p1))
8093 p2 = initial_addr + VS - 1;
8094 realignment_token = call target_builtin;
8095 indx = 0;
8096 loop {
8097 p2 = p2 + indx * vectype_size
8098 lsq = *(floor(p2))
8099 vec_dest = realign_load (msq, lsq, realignment_token)
8100 indx = indx + 1;
8101 msq = lsq;
8102 } */
8104 /* If the misalignment remains the same throughout the execution of the
8105 loop, we can create the init_addr and permutation mask at the loop
8106 preheader. Otherwise, it needs to be created inside the loop.
8107 This can only occur when vectorizing memory accesses in the inner-loop
8108 nested within an outer-loop that is being vectorized. */
8110 if (nested_in_vect_loop
8111 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8112 GET_MODE_SIZE (TYPE_MODE (vectype))))
8114 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8115 compute_in_loop = true;
8118 if ((alignment_support_scheme == dr_explicit_realign_optimized
8119 || alignment_support_scheme == dr_explicit_realign)
8120 && !compute_in_loop)
8122 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8123 alignment_support_scheme, NULL_TREE,
8124 &at_loop);
8125 if (alignment_support_scheme == dr_explicit_realign_optimized)
8127 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8128 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8129 size_one_node);
8132 else
8133 at_loop = loop;
8135 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8136 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8138 tree bump;
8139 tree vec_offset = NULL_TREE;
8140 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8142 aggr_type = NULL_TREE;
8143 bump = NULL_TREE;
8145 else if (memory_access_type == VMAT_GATHER_SCATTER)
8147 aggr_type = elem_type;
8148 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8149 &bump, &vec_offset);
8151 else
8153 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8154 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8155 else
8156 aggr_type = vectype;
8157 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8158 memory_access_type);
8161 tree vec_mask = NULL_TREE;
8162 prev_stmt_info = NULL;
8163 poly_uint64 group_elt = 0;
8164 for (j = 0; j < ncopies; j++)
8166 stmt_vec_info new_stmt_info = NULL;
8167 /* 1. Create the vector or array pointer update chain. */
8168 if (j == 0)
8170 bool simd_lane_access_p
8171 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8172 if (simd_lane_access_p
8173 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8174 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8175 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8176 && integer_zerop (DR_INIT (first_dr_info->dr))
8177 && alias_sets_conflict_p (get_alias_set (aggr_type),
8178 get_alias_set (TREE_TYPE (ref_type)))
8179 && (alignment_support_scheme == dr_aligned
8180 || alignment_support_scheme == dr_unaligned_supported))
8182 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8183 dataref_offset = build_int_cst (ref_type, 0);
8185 else if (first_stmt_info_for_drptr
8186 && first_stmt_info != first_stmt_info_for_drptr)
8188 dataref_ptr
8189 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8190 aggr_type, at_loop, offset, &dummy,
8191 gsi, &ptr_incr, simd_lane_access_p,
8192 byte_offset, bump);
8193 /* Adjust the pointer by the difference to first_stmt. */
8194 data_reference_p ptrdr
8195 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8196 tree diff
8197 = fold_convert (sizetype,
8198 size_binop (MINUS_EXPR,
8199 DR_INIT (first_dr_info->dr),
8200 DR_INIT (ptrdr)));
8201 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8202 stmt_info, diff);
8204 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8205 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8206 &dataref_ptr, &vec_offset);
8207 else
8208 dataref_ptr
8209 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8210 offset, &dummy, gsi, &ptr_incr,
8211 simd_lane_access_p,
8212 byte_offset, bump);
8213 if (mask)
8214 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8215 mask_vectype);
8217 else
8219 if (dataref_offset)
8220 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8221 bump);
8222 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8223 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8224 else
8225 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8226 stmt_info, bump);
8227 if (mask)
8228 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8231 if (grouped_load || slp_perm)
8232 dr_chain.create (vec_num);
8234 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8236 tree vec_array;
8238 vec_array = create_vector_array (vectype, vec_num);
8240 tree final_mask = NULL_TREE;
8241 if (loop_masks)
8242 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8243 vectype, j);
8244 if (vec_mask)
8245 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8246 vec_mask, gsi);
8248 gcall *call;
8249 if (final_mask)
8251 /* Emit:
8252 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8253 VEC_MASK). */
8254 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8255 tree alias_ptr = build_int_cst (ref_type, align);
8256 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8257 dataref_ptr, alias_ptr,
8258 final_mask);
8260 else
8262 /* Emit:
8263 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8264 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8265 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8267 gimple_call_set_lhs (call, vec_array);
8268 gimple_call_set_nothrow (call, true);
8269 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8271 /* Extract each vector into an SSA_NAME. */
8272 for (i = 0; i < vec_num; i++)
8274 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8275 vec_array, i);
8276 dr_chain.quick_push (new_temp);
8279 /* Record the mapping between SSA_NAMEs and statements. */
8280 vect_record_grouped_load_vectors (stmt_info, dr_chain);
8282 /* Record that VEC_ARRAY is now dead. */
8283 vect_clobber_variable (stmt_info, gsi, vec_array);
8285 else
8287 for (i = 0; i < vec_num; i++)
8289 tree final_mask = NULL_TREE;
8290 if (loop_masks
8291 && memory_access_type != VMAT_INVARIANT)
8292 final_mask = vect_get_loop_mask (gsi, loop_masks,
8293 vec_num * ncopies,
8294 vectype, vec_num * j + i);
8295 if (vec_mask)
8296 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8297 vec_mask, gsi);
8299 if (i > 0)
8300 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8301 stmt_info, bump);
8303 /* 2. Create the vector-load in the loop. */
8304 gimple *new_stmt = NULL;
8305 switch (alignment_support_scheme)
8307 case dr_aligned:
8308 case dr_unaligned_supported:
8310 unsigned int misalign;
8311 unsigned HOST_WIDE_INT align;
8313 if (memory_access_type == VMAT_GATHER_SCATTER)
8315 tree scale = size_int (gs_info.scale);
8316 gcall *call;
8317 if (loop_masks)
8318 call = gimple_build_call_internal
8319 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8320 vec_offset, scale, final_mask);
8321 else
8322 call = gimple_build_call_internal
8323 (IFN_GATHER_LOAD, 3, dataref_ptr,
8324 vec_offset, scale);
8325 gimple_call_set_nothrow (call, true);
8326 new_stmt = call;
8327 data_ref = NULL_TREE;
8328 break;
8331 align =
8332 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8333 if (alignment_support_scheme == dr_aligned)
8335 gcc_assert (aligned_access_p (first_dr_info));
8336 misalign = 0;
8338 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8340 align = dr_alignment
8341 (vect_dr_behavior (first_dr_info));
8342 misalign = 0;
8344 else
8345 misalign = DR_MISALIGNMENT (first_dr_info);
8346 if (dataref_offset == NULL_TREE
8347 && TREE_CODE (dataref_ptr) == SSA_NAME)
8348 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8349 align, misalign);
8351 if (final_mask)
8353 align = least_bit_hwi (misalign | align);
8354 tree ptr = build_int_cst (ref_type, align);
8355 gcall *call
8356 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8357 dataref_ptr, ptr,
8358 final_mask);
8359 gimple_call_set_nothrow (call, true);
8360 new_stmt = call;
8361 data_ref = NULL_TREE;
8363 else
8365 data_ref
8366 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8367 dataref_offset
8368 ? dataref_offset
8369 : build_int_cst (ref_type, 0));
8370 if (alignment_support_scheme == dr_aligned)
8372 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8373 TREE_TYPE (data_ref)
8374 = build_aligned_type (TREE_TYPE (data_ref),
8375 align * BITS_PER_UNIT);
8376 else
8377 TREE_TYPE (data_ref)
8378 = build_aligned_type (TREE_TYPE (data_ref),
8379 TYPE_ALIGN (elem_type));
8381 break;
8383 case dr_explicit_realign:
8385 tree ptr, bump;
8387 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8389 if (compute_in_loop)
8390 msq = vect_setup_realignment (first_stmt_info, gsi,
8391 &realignment_token,
8392 dr_explicit_realign,
8393 dataref_ptr, NULL);
8395 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8396 ptr = copy_ssa_name (dataref_ptr);
8397 else
8398 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8399 // For explicit realign the target alignment should be
8400 // known at compile time.
8401 unsigned HOST_WIDE_INT align =
8402 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8403 new_stmt = gimple_build_assign
8404 (ptr, BIT_AND_EXPR, dataref_ptr,
8405 build_int_cst
8406 (TREE_TYPE (dataref_ptr),
8407 -(HOST_WIDE_INT) align));
8408 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8409 data_ref
8410 = build2 (MEM_REF, vectype, ptr,
8411 build_int_cst (ref_type, 0));
8412 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8413 vec_dest = vect_create_destination_var (scalar_dest,
8414 vectype);
8415 new_stmt = gimple_build_assign (vec_dest, data_ref);
8416 new_temp = make_ssa_name (vec_dest, new_stmt);
8417 gimple_assign_set_lhs (new_stmt, new_temp);
8418 gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8419 gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8420 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8421 msq = new_temp;
8423 bump = size_binop (MULT_EXPR, vs,
8424 TYPE_SIZE_UNIT (elem_type));
8425 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8426 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8427 stmt_info, bump);
8428 new_stmt = gimple_build_assign
8429 (NULL_TREE, BIT_AND_EXPR, ptr,
8430 build_int_cst
8431 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8432 ptr = copy_ssa_name (ptr, new_stmt);
8433 gimple_assign_set_lhs (new_stmt, ptr);
8434 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8435 data_ref
8436 = build2 (MEM_REF, vectype, ptr,
8437 build_int_cst (ref_type, 0));
8438 break;
8440 case dr_explicit_realign_optimized:
8442 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8443 new_temp = copy_ssa_name (dataref_ptr);
8444 else
8445 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8446 // We should only be doing this if we know the target
8447 // alignment at compile time.
8448 unsigned HOST_WIDE_INT align =
8449 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8450 new_stmt = gimple_build_assign
8451 (new_temp, BIT_AND_EXPR, dataref_ptr,
8452 build_int_cst (TREE_TYPE (dataref_ptr),
8453 -(HOST_WIDE_INT) align));
8454 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8455 data_ref
8456 = build2 (MEM_REF, vectype, new_temp,
8457 build_int_cst (ref_type, 0));
8458 break;
8460 default:
8461 gcc_unreachable ();
8463 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8464 /* DATA_REF is null if we've already built the statement. */
8465 if (data_ref)
8467 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8468 new_stmt = gimple_build_assign (vec_dest, data_ref);
8470 new_temp = make_ssa_name (vec_dest, new_stmt);
8471 gimple_set_lhs (new_stmt, new_temp);
8472 new_stmt_info
8473 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8475 /* 3. Handle explicit realignment if necessary/supported.
8476 Create in loop:
8477 vec_dest = realign_load (msq, lsq, realignment_token) */
8478 if (alignment_support_scheme == dr_explicit_realign_optimized
8479 || alignment_support_scheme == dr_explicit_realign)
8481 lsq = gimple_assign_lhs (new_stmt);
8482 if (!realignment_token)
8483 realignment_token = dataref_ptr;
8484 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8485 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8486 msq, lsq, realignment_token);
8487 new_temp = make_ssa_name (vec_dest, new_stmt);
8488 gimple_assign_set_lhs (new_stmt, new_temp);
8489 new_stmt_info
8490 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8492 if (alignment_support_scheme == dr_explicit_realign_optimized)
8494 gcc_assert (phi);
8495 if (i == vec_num - 1 && j == ncopies - 1)
8496 add_phi_arg (phi, lsq,
8497 loop_latch_edge (containing_loop),
8498 UNKNOWN_LOCATION);
8499 msq = lsq;
8503 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8505 tree perm_mask = perm_mask_for_reverse (vectype);
8506 new_temp = permute_vec_elements (new_temp, new_temp,
8507 perm_mask, stmt_info, gsi);
8508 new_stmt_info = vinfo->lookup_def (new_temp);
8511 /* Collect vector loads and later create their permutation in
8512 vect_transform_grouped_load (). */
8513 if (grouped_load || slp_perm)
8514 dr_chain.quick_push (new_temp);
8516 /* Store vector loads in the corresponding SLP_NODE. */
8517 if (slp && !slp_perm)
8518 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8520 /* With SLP permutation we load the gaps as well, without
8521 we need to skip the gaps after we manage to fully load
8522 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8523 group_elt += nunits;
8524 if (maybe_ne (group_gap_adj, 0U)
8525 && !slp_perm
8526 && known_eq (group_elt, group_size - group_gap_adj))
8528 poly_wide_int bump_val
8529 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8530 * group_gap_adj);
8531 tree bump = wide_int_to_tree (sizetype, bump_val);
8532 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8533 stmt_info, bump);
8534 group_elt = 0;
8537 /* Bump the vector pointer to account for a gap or for excess
8538 elements loaded for a permuted SLP load. */
8539 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8541 poly_wide_int bump_val
8542 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8543 * group_gap_adj);
8544 tree bump = wide_int_to_tree (sizetype, bump_val);
8545 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8546 stmt_info, bump);
8550 if (slp && !slp_perm)
8551 continue;
8553 if (slp_perm)
8555 unsigned n_perms;
8556 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8557 slp_node_instance, false,
8558 &n_perms))
8560 dr_chain.release ();
8561 return false;
8564 else
8566 if (grouped_load)
8568 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8569 vect_transform_grouped_load (stmt_info, dr_chain,
8570 group_size, gsi);
8571 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8573 else
8575 if (j == 0)
8576 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8577 else
8578 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8579 prev_stmt_info = new_stmt_info;
8582 dr_chain.release ();
8585 return true;
8588 /* Function vect_is_simple_cond.
8590 Input:
8591 LOOP - the loop that is being vectorized.
8592 COND - Condition that is checked for simple use.
8594 Output:
8595 *COMP_VECTYPE - the vector type for the comparison.
8596 *DTS - The def types for the arguments of the comparison
8598 Returns whether a COND can be vectorized. Checks whether
8599 condition operands are supportable using vec_is_simple_use. */
8601 static bool
8602 vect_is_simple_cond (tree cond, vec_info *vinfo,
8603 tree *comp_vectype, enum vect_def_type *dts,
8604 tree vectype)
8606 tree lhs, rhs;
8607 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8609 /* Mask case. */
8610 if (TREE_CODE (cond) == SSA_NAME
8611 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8613 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8614 || !*comp_vectype
8615 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8616 return false;
8617 return true;
8620 if (!COMPARISON_CLASS_P (cond))
8621 return false;
8623 lhs = TREE_OPERAND (cond, 0);
8624 rhs = TREE_OPERAND (cond, 1);
8626 if (TREE_CODE (lhs) == SSA_NAME)
8628 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8629 return false;
8631 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8632 || TREE_CODE (lhs) == FIXED_CST)
8633 dts[0] = vect_constant_def;
8634 else
8635 return false;
8637 if (TREE_CODE (rhs) == SSA_NAME)
8639 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8640 return false;
8642 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8643 || TREE_CODE (rhs) == FIXED_CST)
8644 dts[1] = vect_constant_def;
8645 else
8646 return false;
8648 if (vectype1 && vectype2
8649 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8650 TYPE_VECTOR_SUBPARTS (vectype2)))
8651 return false;
8653 *comp_vectype = vectype1 ? vectype1 : vectype2;
8654 /* Invariant comparison. */
8655 if (! *comp_vectype && vectype)
8657 tree scalar_type = TREE_TYPE (lhs);
8658 /* If we can widen the comparison to match vectype do so. */
8659 if (INTEGRAL_TYPE_P (scalar_type)
8660 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8661 TYPE_SIZE (TREE_TYPE (vectype))))
8662 scalar_type = build_nonstandard_integer_type
8663 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8664 TYPE_UNSIGNED (scalar_type));
8665 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8668 return true;
8671 /* vectorizable_condition.
8673 Check if STMT_INFO is conditional modify expression that can be vectorized.
8674 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8675 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8676 at GSI.
8678 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
8680 Return true if STMT_INFO is vectorizable in this way. */
8682 bool
8683 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8684 stmt_vec_info *vec_stmt, bool for_reduction,
8685 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8687 vec_info *vinfo = stmt_info->vinfo;
8688 tree scalar_dest = NULL_TREE;
8689 tree vec_dest = NULL_TREE;
8690 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8691 tree then_clause, else_clause;
8692 tree comp_vectype = NULL_TREE;
8693 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8694 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8695 tree vec_compare;
8696 tree new_temp;
8697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8698 enum vect_def_type dts[4]
8699 = {vect_unknown_def_type, vect_unknown_def_type,
8700 vect_unknown_def_type, vect_unknown_def_type};
8701 int ndts = 4;
8702 int ncopies;
8703 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8704 stmt_vec_info prev_stmt_info = NULL;
8705 int i, j;
8706 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8707 vec<tree> vec_oprnds0 = vNULL;
8708 vec<tree> vec_oprnds1 = vNULL;
8709 vec<tree> vec_oprnds2 = vNULL;
8710 vec<tree> vec_oprnds3 = vNULL;
8711 tree vec_cmp_type;
8712 bool masked = false;
8714 if (for_reduction && STMT_SLP_TYPE (stmt_info))
8715 return false;
8717 vect_reduction_type reduction_type
8718 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8719 if (reduction_type == TREE_CODE_REDUCTION)
8721 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8722 return false;
8724 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8725 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8726 && for_reduction))
8727 return false;
8729 /* FORNOW: not yet supported. */
8730 if (STMT_VINFO_LIVE_P (stmt_info))
8732 if (dump_enabled_p ())
8733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8734 "value used after loop.\n");
8735 return false;
8739 /* Is vectorizable conditional operation? */
8740 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8741 if (!stmt)
8742 return false;
8744 code = gimple_assign_rhs_code (stmt);
8746 if (code != COND_EXPR)
8747 return false;
8749 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8750 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8752 if (slp_node)
8753 ncopies = 1;
8754 else
8755 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8757 gcc_assert (ncopies >= 1);
8758 if (for_reduction && ncopies > 1)
8759 return false; /* FORNOW */
8761 cond_expr = gimple_assign_rhs1 (stmt);
8762 then_clause = gimple_assign_rhs2 (stmt);
8763 else_clause = gimple_assign_rhs3 (stmt);
8765 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8766 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8767 || !comp_vectype)
8768 return false;
8770 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8771 return false;
8772 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8773 return false;
8775 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8776 return false;
8778 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8779 return false;
8781 masked = !COMPARISON_CLASS_P (cond_expr);
8782 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8784 if (vec_cmp_type == NULL_TREE)
8785 return false;
8787 cond_code = TREE_CODE (cond_expr);
8788 if (!masked)
8790 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8791 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8794 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8796 /* Boolean values may have another representation in vectors
8797 and therefore we prefer bit operations over comparison for
8798 them (which also works for scalar masks). We store opcodes
8799 to use in bitop1 and bitop2. Statement is vectorized as
8800 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8801 depending on bitop1 and bitop2 arity. */
8802 switch (cond_code)
8804 case GT_EXPR:
8805 bitop1 = BIT_NOT_EXPR;
8806 bitop2 = BIT_AND_EXPR;
8807 break;
8808 case GE_EXPR:
8809 bitop1 = BIT_NOT_EXPR;
8810 bitop2 = BIT_IOR_EXPR;
8811 break;
8812 case LT_EXPR:
8813 bitop1 = BIT_NOT_EXPR;
8814 bitop2 = BIT_AND_EXPR;
8815 std::swap (cond_expr0, cond_expr1);
8816 break;
8817 case LE_EXPR:
8818 bitop1 = BIT_NOT_EXPR;
8819 bitop2 = BIT_IOR_EXPR;
8820 std::swap (cond_expr0, cond_expr1);
8821 break;
8822 case NE_EXPR:
8823 bitop1 = BIT_XOR_EXPR;
8824 break;
8825 case EQ_EXPR:
8826 bitop1 = BIT_XOR_EXPR;
8827 bitop2 = BIT_NOT_EXPR;
8828 break;
8829 default:
8830 return false;
8832 cond_code = SSA_NAME;
8835 if (!vec_stmt)
8837 if (bitop1 != NOP_EXPR)
8839 machine_mode mode = TYPE_MODE (comp_vectype);
8840 optab optab;
8842 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8843 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8844 return false;
8846 if (bitop2 != NOP_EXPR)
8848 optab = optab_for_tree_code (bitop2, comp_vectype,
8849 optab_default);
8850 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8851 return false;
8854 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8855 cond_code))
8857 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8858 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8859 cost_vec);
8860 return true;
8862 return false;
8865 /* Transform. */
8867 if (!slp_node)
8869 vec_oprnds0.create (1);
8870 vec_oprnds1.create (1);
8871 vec_oprnds2.create (1);
8872 vec_oprnds3.create (1);
8875 /* Handle def. */
8876 scalar_dest = gimple_assign_lhs (stmt);
8877 if (reduction_type != EXTRACT_LAST_REDUCTION)
8878 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8880 /* Handle cond expr. */
8881 for (j = 0; j < ncopies; j++)
8883 stmt_vec_info new_stmt_info = NULL;
8884 if (j == 0)
8886 if (slp_node)
8888 auto_vec<tree, 4> ops;
8889 auto_vec<vec<tree>, 4> vec_defs;
8891 if (masked)
8892 ops.safe_push (cond_expr);
8893 else
8895 ops.safe_push (cond_expr0);
8896 ops.safe_push (cond_expr1);
8898 ops.safe_push (then_clause);
8899 ops.safe_push (else_clause);
8900 vect_get_slp_defs (ops, slp_node, &vec_defs);
8901 vec_oprnds3 = vec_defs.pop ();
8902 vec_oprnds2 = vec_defs.pop ();
8903 if (!masked)
8904 vec_oprnds1 = vec_defs.pop ();
8905 vec_oprnds0 = vec_defs.pop ();
8907 else
8909 if (masked)
8911 vec_cond_lhs
8912 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
8913 comp_vectype);
8915 else
8917 vec_cond_lhs
8918 = vect_get_vec_def_for_operand (cond_expr0,
8919 stmt_info, comp_vectype);
8920 vec_cond_rhs
8921 = vect_get_vec_def_for_operand (cond_expr1,
8922 stmt_info, comp_vectype);
8924 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8925 stmt_info);
8926 if (reduction_type != EXTRACT_LAST_REDUCTION)
8927 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8928 stmt_info);
8931 else
8933 vec_cond_lhs
8934 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
8935 if (!masked)
8936 vec_cond_rhs
8937 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
8939 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
8940 vec_oprnds2.pop ());
8941 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
8942 vec_oprnds3.pop ());
8945 if (!slp_node)
8947 vec_oprnds0.quick_push (vec_cond_lhs);
8948 if (!masked)
8949 vec_oprnds1.quick_push (vec_cond_rhs);
8950 vec_oprnds2.quick_push (vec_then_clause);
8951 vec_oprnds3.quick_push (vec_else_clause);
8954 /* Arguments are ready. Create the new vector stmt. */
8955 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8957 vec_then_clause = vec_oprnds2[i];
8958 vec_else_clause = vec_oprnds3[i];
8960 if (masked)
8961 vec_compare = vec_cond_lhs;
8962 else
8964 vec_cond_rhs = vec_oprnds1[i];
8965 if (bitop1 == NOP_EXPR)
8966 vec_compare = build2 (cond_code, vec_cmp_type,
8967 vec_cond_lhs, vec_cond_rhs);
8968 else
8970 new_temp = make_ssa_name (vec_cmp_type);
8971 gassign *new_stmt;
8972 if (bitop1 == BIT_NOT_EXPR)
8973 new_stmt = gimple_build_assign (new_temp, bitop1,
8974 vec_cond_rhs);
8975 else
8976 new_stmt
8977 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8978 vec_cond_rhs);
8979 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8980 if (bitop2 == NOP_EXPR)
8981 vec_compare = new_temp;
8982 else if (bitop2 == BIT_NOT_EXPR)
8984 /* Instead of doing ~x ? y : z do x ? z : y. */
8985 vec_compare = new_temp;
8986 std::swap (vec_then_clause, vec_else_clause);
8988 else
8990 vec_compare = make_ssa_name (vec_cmp_type);
8991 new_stmt
8992 = gimple_build_assign (vec_compare, bitop2,
8993 vec_cond_lhs, new_temp);
8994 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8998 if (reduction_type == EXTRACT_LAST_REDUCTION)
9000 if (!is_gimple_val (vec_compare))
9002 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9003 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9004 vec_compare);
9005 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9006 vec_compare = vec_compare_name;
9008 gcall *new_stmt = gimple_build_call_internal
9009 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9010 vec_then_clause);
9011 gimple_call_set_lhs (new_stmt, scalar_dest);
9012 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9013 if (stmt_info->stmt == gsi_stmt (*gsi))
9014 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9015 else
9017 /* In this case we're moving the definition to later in the
9018 block. That doesn't matter because the only uses of the
9019 lhs are in phi statements. */
9020 gimple_stmt_iterator old_gsi
9021 = gsi_for_stmt (stmt_info->stmt);
9022 gsi_remove (&old_gsi, true);
9023 new_stmt_info
9024 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9027 else
9029 new_temp = make_ssa_name (vec_dest);
9030 gassign *new_stmt
9031 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9032 vec_then_clause, vec_else_clause);
9033 new_stmt_info
9034 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9036 if (slp_node)
9037 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9040 if (slp_node)
9041 continue;
9043 if (j == 0)
9044 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9045 else
9046 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9048 prev_stmt_info = new_stmt_info;
9051 vec_oprnds0.release ();
9052 vec_oprnds1.release ();
9053 vec_oprnds2.release ();
9054 vec_oprnds3.release ();
9056 return true;
9059 /* vectorizable_comparison.
9061 Check if STMT_INFO is comparison expression that can be vectorized.
9062 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9063 comparison, put it in VEC_STMT, and insert it at GSI.
9065 Return true if STMT_INFO is vectorizable in this way. */
9067 static bool
9068 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9069 stmt_vec_info *vec_stmt,
9070 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9072 vec_info *vinfo = stmt_info->vinfo;
9073 tree lhs, rhs1, rhs2;
9074 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9075 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9076 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9077 tree new_temp;
9078 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9079 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9080 int ndts = 2;
9081 poly_uint64 nunits;
9082 int ncopies;
9083 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9084 stmt_vec_info prev_stmt_info = NULL;
9085 int i, j;
9086 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9087 vec<tree> vec_oprnds0 = vNULL;
9088 vec<tree> vec_oprnds1 = vNULL;
9089 tree mask_type;
9090 tree mask;
9092 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9093 return false;
9095 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9096 return false;
9098 mask_type = vectype;
9099 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9101 if (slp_node)
9102 ncopies = 1;
9103 else
9104 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9106 gcc_assert (ncopies >= 1);
9107 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9108 return false;
9110 if (STMT_VINFO_LIVE_P (stmt_info))
9112 if (dump_enabled_p ())
9113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9114 "value used after loop.\n");
9115 return false;
9118 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9119 if (!stmt)
9120 return false;
9122 code = gimple_assign_rhs_code (stmt);
9124 if (TREE_CODE_CLASS (code) != tcc_comparison)
9125 return false;
9127 rhs1 = gimple_assign_rhs1 (stmt);
9128 rhs2 = gimple_assign_rhs2 (stmt);
9130 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9131 return false;
9133 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9134 return false;
9136 if (vectype1 && vectype2
9137 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9138 TYPE_VECTOR_SUBPARTS (vectype2)))
9139 return false;
9141 vectype = vectype1 ? vectype1 : vectype2;
9143 /* Invariant comparison. */
9144 if (!vectype)
9146 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9147 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9148 return false;
9150 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9151 return false;
9153 /* Can't compare mask and non-mask types. */
9154 if (vectype1 && vectype2
9155 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9156 return false;
9158 /* Boolean values may have another representation in vectors
9159 and therefore we prefer bit operations over comparison for
9160 them (which also works for scalar masks). We store opcodes
9161 to use in bitop1 and bitop2. Statement is vectorized as
9162 BITOP2 (rhs1 BITOP1 rhs2) or
9163 rhs1 BITOP2 (BITOP1 rhs2)
9164 depending on bitop1 and bitop2 arity. */
9165 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9167 if (code == GT_EXPR)
9169 bitop1 = BIT_NOT_EXPR;
9170 bitop2 = BIT_AND_EXPR;
9172 else if (code == GE_EXPR)
9174 bitop1 = BIT_NOT_EXPR;
9175 bitop2 = BIT_IOR_EXPR;
9177 else if (code == LT_EXPR)
9179 bitop1 = BIT_NOT_EXPR;
9180 bitop2 = BIT_AND_EXPR;
9181 std::swap (rhs1, rhs2);
9182 std::swap (dts[0], dts[1]);
9184 else if (code == LE_EXPR)
9186 bitop1 = BIT_NOT_EXPR;
9187 bitop2 = BIT_IOR_EXPR;
9188 std::swap (rhs1, rhs2);
9189 std::swap (dts[0], dts[1]);
9191 else
9193 bitop1 = BIT_XOR_EXPR;
9194 if (code == EQ_EXPR)
9195 bitop2 = BIT_NOT_EXPR;
9199 if (!vec_stmt)
9201 if (bitop1 == NOP_EXPR)
9203 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9204 return false;
9206 else
9208 machine_mode mode = TYPE_MODE (vectype);
9209 optab optab;
9211 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9212 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9213 return false;
9215 if (bitop2 != NOP_EXPR)
9217 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9218 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9219 return false;
9223 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9224 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9225 dts, ndts, slp_node, cost_vec);
9226 return true;
9229 /* Transform. */
9230 if (!slp_node)
9232 vec_oprnds0.create (1);
9233 vec_oprnds1.create (1);
9236 /* Handle def. */
9237 lhs = gimple_assign_lhs (stmt);
9238 mask = vect_create_destination_var (lhs, mask_type);
9240 /* Handle cmp expr. */
9241 for (j = 0; j < ncopies; j++)
9243 stmt_vec_info new_stmt_info = NULL;
9244 if (j == 0)
9246 if (slp_node)
9248 auto_vec<tree, 2> ops;
9249 auto_vec<vec<tree>, 2> vec_defs;
9251 ops.safe_push (rhs1);
9252 ops.safe_push (rhs2);
9253 vect_get_slp_defs (ops, slp_node, &vec_defs);
9254 vec_oprnds1 = vec_defs.pop ();
9255 vec_oprnds0 = vec_defs.pop ();
9257 else
9259 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9260 vectype);
9261 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9262 vectype);
9265 else
9267 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9268 vec_oprnds0.pop ());
9269 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9270 vec_oprnds1.pop ());
9273 if (!slp_node)
9275 vec_oprnds0.quick_push (vec_rhs1);
9276 vec_oprnds1.quick_push (vec_rhs2);
9279 /* Arguments are ready. Create the new vector stmt. */
9280 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9282 vec_rhs2 = vec_oprnds1[i];
9284 new_temp = make_ssa_name (mask);
9285 if (bitop1 == NOP_EXPR)
9287 gassign *new_stmt = gimple_build_assign (new_temp, code,
9288 vec_rhs1, vec_rhs2);
9289 new_stmt_info
9290 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9292 else
9294 gassign *new_stmt;
9295 if (bitop1 == BIT_NOT_EXPR)
9296 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9297 else
9298 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9299 vec_rhs2);
9300 new_stmt_info
9301 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9302 if (bitop2 != NOP_EXPR)
9304 tree res = make_ssa_name (mask);
9305 if (bitop2 == BIT_NOT_EXPR)
9306 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9307 else
9308 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9309 new_temp);
9310 new_stmt_info
9311 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9314 if (slp_node)
9315 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9318 if (slp_node)
9319 continue;
9321 if (j == 0)
9322 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9323 else
9324 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9326 prev_stmt_info = new_stmt_info;
9329 vec_oprnds0.release ();
9330 vec_oprnds1.release ();
9332 return true;
9335 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9336 can handle all live statements in the node. Otherwise return true
9337 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9338 GSI and VEC_STMT are as for vectorizable_live_operation. */
9340 static bool
9341 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9342 slp_tree slp_node, stmt_vec_info *vec_stmt,
9343 stmt_vector_for_cost *cost_vec)
9345 if (slp_node)
9347 stmt_vec_info slp_stmt_info;
9348 unsigned int i;
9349 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9351 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9352 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9353 vec_stmt, cost_vec))
9354 return false;
9357 else if (STMT_VINFO_LIVE_P (stmt_info)
9358 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9359 vec_stmt, cost_vec))
9360 return false;
9362 return true;
9365 /* Make sure the statement is vectorizable. */
9367 opt_result
9368 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9369 slp_tree node, slp_instance node_instance,
9370 stmt_vector_for_cost *cost_vec)
9372 vec_info *vinfo = stmt_info->vinfo;
9373 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9374 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9375 bool ok;
9376 gimple_seq pattern_def_seq;
9378 if (dump_enabled_p ())
9379 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9380 stmt_info->stmt);
9382 if (gimple_has_volatile_ops (stmt_info->stmt))
9383 return opt_result::failure_at (stmt_info->stmt,
9384 "not vectorized:"
9385 " stmt has volatile operands: %G\n",
9386 stmt_info->stmt);
9388 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9389 && node == NULL
9390 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9392 gimple_stmt_iterator si;
9394 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9396 stmt_vec_info pattern_def_stmt_info
9397 = vinfo->lookup_stmt (gsi_stmt (si));
9398 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9399 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9401 /* Analyze def stmt of STMT if it's a pattern stmt. */
9402 if (dump_enabled_p ())
9403 dump_printf_loc (MSG_NOTE, vect_location,
9404 "==> examining pattern def statement: %G",
9405 pattern_def_stmt_info->stmt);
9407 opt_result res
9408 = vect_analyze_stmt (pattern_def_stmt_info,
9409 need_to_vectorize, node, node_instance,
9410 cost_vec);
9411 if (!res)
9412 return res;
9417 /* Skip stmts that do not need to be vectorized. In loops this is expected
9418 to include:
9419 - the COND_EXPR which is the loop exit condition
9420 - any LABEL_EXPRs in the loop
9421 - computations that are used only for array indexing or loop control.
9422 In basic blocks we only analyze statements that are a part of some SLP
9423 instance, therefore, all the statements are relevant.
9425 Pattern statement needs to be analyzed instead of the original statement
9426 if the original statement is not relevant. Otherwise, we analyze both
9427 statements. In basic blocks we are called from some SLP instance
9428 traversal, don't analyze pattern stmts instead, the pattern stmts
9429 already will be part of SLP instance. */
9431 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9432 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9433 && !STMT_VINFO_LIVE_P (stmt_info))
9435 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9436 && pattern_stmt_info
9437 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9438 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9440 /* Analyze PATTERN_STMT instead of the original stmt. */
9441 stmt_info = pattern_stmt_info;
9442 if (dump_enabled_p ())
9443 dump_printf_loc (MSG_NOTE, vect_location,
9444 "==> examining pattern statement: %G",
9445 stmt_info->stmt);
9447 else
9449 if (dump_enabled_p ())
9450 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9452 return opt_result::success ();
9455 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9456 && node == NULL
9457 && pattern_stmt_info
9458 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9459 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9461 /* Analyze PATTERN_STMT too. */
9462 if (dump_enabled_p ())
9463 dump_printf_loc (MSG_NOTE, vect_location,
9464 "==> examining pattern statement: %G",
9465 pattern_stmt_info->stmt);
9467 opt_result res
9468 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9469 node_instance, cost_vec);
9470 if (!res)
9471 return res;
9474 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9476 case vect_internal_def:
9477 break;
9479 case vect_reduction_def:
9480 case vect_nested_cycle:
9481 gcc_assert (!bb_vinfo
9482 && (relevance == vect_used_in_outer
9483 || relevance == vect_used_in_outer_by_reduction
9484 || relevance == vect_used_by_reduction
9485 || relevance == vect_unused_in_scope
9486 || relevance == vect_used_only_live));
9487 break;
9489 case vect_induction_def:
9490 gcc_assert (!bb_vinfo);
9491 break;
9493 case vect_constant_def:
9494 case vect_external_def:
9495 case vect_unknown_def_type:
9496 default:
9497 gcc_unreachable ();
9500 if (STMT_VINFO_RELEVANT_P (stmt_info))
9502 tree type = gimple_expr_type (stmt_info->stmt);
9503 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9504 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9505 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9506 || (call && gimple_call_lhs (call) == NULL_TREE));
9507 *need_to_vectorize = true;
9510 if (PURE_SLP_STMT (stmt_info) && !node)
9512 if (dump_enabled_p ())
9513 dump_printf_loc (MSG_NOTE, vect_location,
9514 "handled only by SLP analysis\n");
9515 return opt_result::success ();
9518 ok = true;
9519 if (!bb_vinfo
9520 && (STMT_VINFO_RELEVANT_P (stmt_info)
9521 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9522 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9523 -mveclibabi= takes preference over library functions with
9524 the simd attribute. */
9525 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9526 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9527 cost_vec)
9528 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9529 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9530 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9531 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9532 cost_vec)
9533 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9534 || vectorizable_reduction (stmt_info, NULL, NULL, node,
9535 node_instance, cost_vec)
9536 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9537 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9538 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9539 cost_vec)
9540 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9541 cost_vec));
9542 else
9544 if (bb_vinfo)
9545 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9546 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9547 cost_vec)
9548 || vectorizable_conversion (stmt_info, NULL, NULL, node,
9549 cost_vec)
9550 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9551 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9552 || vectorizable_assignment (stmt_info, NULL, NULL, node,
9553 cost_vec)
9554 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9555 cost_vec)
9556 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9557 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9558 cost_vec)
9559 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9560 cost_vec));
9563 if (!ok)
9564 return opt_result::failure_at (stmt_info->stmt,
9565 "not vectorized:"
9566 " relevant stmt not supported: %G",
9567 stmt_info->stmt);
9569 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9570 need extra handling, except for vectorizable reductions. */
9571 if (!bb_vinfo
9572 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9573 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9574 return opt_result::failure_at (stmt_info->stmt,
9575 "not vectorized:"
9576 " live stmt not supported: %G",
9577 stmt_info->stmt);
9579 return opt_result::success ();
9583 /* Function vect_transform_stmt.
9585 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9587 bool
9588 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9589 slp_tree slp_node, slp_instance slp_node_instance)
9591 vec_info *vinfo = stmt_info->vinfo;
9592 bool is_store = false;
9593 stmt_vec_info vec_stmt = NULL;
9594 bool done;
9596 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9597 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9599 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9600 && nested_in_vect_loop_p
9601 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9602 stmt_info));
9604 gimple *stmt = stmt_info->stmt;
9605 switch (STMT_VINFO_TYPE (stmt_info))
9607 case type_demotion_vec_info_type:
9608 case type_promotion_vec_info_type:
9609 case type_conversion_vec_info_type:
9610 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9611 NULL);
9612 gcc_assert (done);
9613 break;
9615 case induc_vec_info_type:
9616 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9617 NULL);
9618 gcc_assert (done);
9619 break;
9621 case shift_vec_info_type:
9622 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9623 gcc_assert (done);
9624 break;
9626 case op_vec_info_type:
9627 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9628 NULL);
9629 gcc_assert (done);
9630 break;
9632 case assignment_vec_info_type:
9633 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9634 NULL);
9635 gcc_assert (done);
9636 break;
9638 case load_vec_info_type:
9639 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9640 slp_node_instance, NULL);
9641 gcc_assert (done);
9642 break;
9644 case store_vec_info_type:
9645 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9646 gcc_assert (done);
9647 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9649 /* In case of interleaving, the whole chain is vectorized when the
9650 last store in the chain is reached. Store stmts before the last
9651 one are skipped, and there vec_stmt_info shouldn't be freed
9652 meanwhile. */
9653 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9654 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9655 is_store = true;
9657 else
9658 is_store = true;
9659 break;
9661 case condition_vec_info_type:
9662 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
9663 slp_node, NULL);
9664 gcc_assert (done);
9665 break;
9667 case comparison_vec_info_type:
9668 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
9669 slp_node, NULL);
9670 gcc_assert (done);
9671 break;
9673 case call_vec_info_type:
9674 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9675 stmt = gsi_stmt (*gsi);
9676 break;
9678 case call_simd_clone_vec_info_type:
9679 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9680 slp_node, NULL);
9681 stmt = gsi_stmt (*gsi);
9682 break;
9684 case reduc_vec_info_type:
9685 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9686 slp_node_instance, NULL);
9687 gcc_assert (done);
9688 break;
9690 default:
9691 if (!STMT_VINFO_LIVE_P (stmt_info))
9693 if (dump_enabled_p ())
9694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9695 "stmt not supported.\n");
9696 gcc_unreachable ();
9700 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9701 This would break hybrid SLP vectorization. */
9702 if (slp_node)
9703 gcc_assert (!vec_stmt
9704 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9706 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9707 is being vectorized, but outside the immediately enclosing loop. */
9708 if (vec_stmt
9709 && nested_p
9710 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9711 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9712 || STMT_VINFO_RELEVANT (stmt_info) ==
9713 vect_used_in_outer_by_reduction))
9715 struct loop *innerloop = LOOP_VINFO_LOOP (
9716 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9717 imm_use_iterator imm_iter;
9718 use_operand_p use_p;
9719 tree scalar_dest;
9721 if (dump_enabled_p ())
9722 dump_printf_loc (MSG_NOTE, vect_location,
9723 "Record the vdef for outer-loop vectorization.\n");
9725 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9726 (to be used when vectorizing outer-loop stmts that use the DEF of
9727 STMT). */
9728 if (gimple_code (stmt) == GIMPLE_PHI)
9729 scalar_dest = PHI_RESULT (stmt);
9730 else
9731 scalar_dest = gimple_get_lhs (stmt);
9733 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9734 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9736 stmt_vec_info exit_phi_info
9737 = vinfo->lookup_stmt (USE_STMT (use_p));
9738 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9742 /* Handle stmts whose DEF is used outside the loop-nest that is
9743 being vectorized. */
9744 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9746 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9747 NULL);
9748 gcc_assert (done);
9751 if (vec_stmt)
9752 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9754 return is_store;
9758 /* Remove a group of stores (for SLP or interleaving), free their
9759 stmt_vec_info. */
9761 void
9762 vect_remove_stores (stmt_vec_info first_stmt_info)
9764 vec_info *vinfo = first_stmt_info->vinfo;
9765 stmt_vec_info next_stmt_info = first_stmt_info;
9767 while (next_stmt_info)
9769 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9770 next_stmt_info = vect_orig_stmt (next_stmt_info);
9771 /* Free the attached stmt_vec_info and remove the stmt. */
9772 vinfo->remove_stmt (next_stmt_info);
9773 next_stmt_info = tmp;
9777 /* Function get_vectype_for_scalar_type_and_size.
9779 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9780 by the target. */
9782 tree
9783 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9785 tree orig_scalar_type = scalar_type;
9786 scalar_mode inner_mode;
9787 machine_mode simd_mode;
9788 poly_uint64 nunits;
9789 tree vectype;
9791 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9792 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9793 return NULL_TREE;
9795 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9797 /* For vector types of elements whose mode precision doesn't
9798 match their types precision we use a element type of mode
9799 precision. The vectorization routines will have to make sure
9800 they support the proper result truncation/extension.
9801 We also make sure to build vector types with INTEGER_TYPE
9802 component type only. */
9803 if (INTEGRAL_TYPE_P (scalar_type)
9804 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9805 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9806 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9807 TYPE_UNSIGNED (scalar_type));
9809 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9810 When the component mode passes the above test simply use a type
9811 corresponding to that mode. The theory is that any use that
9812 would cause problems with this will disable vectorization anyway. */
9813 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9814 && !INTEGRAL_TYPE_P (scalar_type))
9815 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9817 /* We can't build a vector type of elements with alignment bigger than
9818 their size. */
9819 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9820 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9821 TYPE_UNSIGNED (scalar_type));
9823 /* If we felt back to using the mode fail if there was
9824 no scalar type for it. */
9825 if (scalar_type == NULL_TREE)
9826 return NULL_TREE;
9828 /* If no size was supplied use the mode the target prefers. Otherwise
9829 lookup a vector mode of the specified size. */
9830 if (known_eq (size, 0U))
9831 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9832 else if (!multiple_p (size, nbytes, &nunits)
9833 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9834 return NULL_TREE;
9835 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9836 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9837 return NULL_TREE;
9839 vectype = build_vector_type (scalar_type, nunits);
9841 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9842 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9843 return NULL_TREE;
9845 /* Re-attach the address-space qualifier if we canonicalized the scalar
9846 type. */
9847 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9848 return build_qualified_type
9849 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9851 return vectype;
9854 poly_uint64 current_vector_size;
9856 /* Function get_vectype_for_scalar_type.
9858 Returns the vector type corresponding to SCALAR_TYPE as supported
9859 by the target. */
9861 tree
9862 get_vectype_for_scalar_type (tree scalar_type)
9864 tree vectype;
9865 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9866 current_vector_size);
9867 if (vectype
9868 && known_eq (current_vector_size, 0U))
9869 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9870 return vectype;
9873 /* Function get_mask_type_for_scalar_type.
9875 Returns the mask type corresponding to a result of comparison
9876 of vectors of specified SCALAR_TYPE as supported by target. */
9878 tree
9879 get_mask_type_for_scalar_type (tree scalar_type)
9881 tree vectype = get_vectype_for_scalar_type (scalar_type);
9883 if (!vectype)
9884 return NULL;
9886 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9887 current_vector_size);
9890 /* Function get_same_sized_vectype
9892 Returns a vector type corresponding to SCALAR_TYPE of size
9893 VECTOR_TYPE if supported by the target. */
9895 tree
9896 get_same_sized_vectype (tree scalar_type, tree vector_type)
9898 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9899 return build_same_sized_truth_vector_type (vector_type);
9901 return get_vectype_for_scalar_type_and_size
9902 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9905 /* Function vect_is_simple_use.
9907 Input:
9908 VINFO - the vect info of the loop or basic block that is being vectorized.
9909 OPERAND - operand in the loop or bb.
9910 Output:
9911 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
9912 case OPERAND is an SSA_NAME that is defined in the vectorizable region
9913 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
9914 the definition could be anywhere in the function
9915 DT - the type of definition
9917 Returns whether a stmt with OPERAND can be vectorized.
9918 For loops, supportable operands are constants, loop invariants, and operands
9919 that are defined by the current iteration of the loop. Unsupportable
9920 operands are those that are defined by a previous iteration of the loop (as
9921 is the case in reduction/induction computations).
9922 For basic blocks, supportable operands are constants and bb invariants.
9923 For now, operands defined outside the basic block are not supported. */
9925 bool
9926 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
9927 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
9929 if (def_stmt_info_out)
9930 *def_stmt_info_out = NULL;
9931 if (def_stmt_out)
9932 *def_stmt_out = NULL;
9933 *dt = vect_unknown_def_type;
9935 if (dump_enabled_p ())
9937 dump_printf_loc (MSG_NOTE, vect_location,
9938 "vect_is_simple_use: operand ");
9939 if (TREE_CODE (operand) == SSA_NAME
9940 && !SSA_NAME_IS_DEFAULT_DEF (operand))
9941 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
9942 else
9943 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9946 if (CONSTANT_CLASS_P (operand))
9947 *dt = vect_constant_def;
9948 else if (is_gimple_min_invariant (operand))
9949 *dt = vect_external_def;
9950 else if (TREE_CODE (operand) != SSA_NAME)
9951 *dt = vect_unknown_def_type;
9952 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
9953 *dt = vect_external_def;
9954 else
9956 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
9957 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
9958 if (!stmt_vinfo)
9959 *dt = vect_external_def;
9960 else
9962 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
9963 def_stmt = stmt_vinfo->stmt;
9964 switch (gimple_code (def_stmt))
9966 case GIMPLE_PHI:
9967 case GIMPLE_ASSIGN:
9968 case GIMPLE_CALL:
9969 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9970 break;
9971 default:
9972 *dt = vect_unknown_def_type;
9973 break;
9975 if (def_stmt_info_out)
9976 *def_stmt_info_out = stmt_vinfo;
9978 if (def_stmt_out)
9979 *def_stmt_out = def_stmt;
9982 if (dump_enabled_p ())
9984 dump_printf (MSG_NOTE, ", type of def: ");
9985 switch (*dt)
9987 case vect_uninitialized_def:
9988 dump_printf (MSG_NOTE, "uninitialized\n");
9989 break;
9990 case vect_constant_def:
9991 dump_printf (MSG_NOTE, "constant\n");
9992 break;
9993 case vect_external_def:
9994 dump_printf (MSG_NOTE, "external\n");
9995 break;
9996 case vect_internal_def:
9997 dump_printf (MSG_NOTE, "internal\n");
9998 break;
9999 case vect_induction_def:
10000 dump_printf (MSG_NOTE, "induction\n");
10001 break;
10002 case vect_reduction_def:
10003 dump_printf (MSG_NOTE, "reduction\n");
10004 break;
10005 case vect_double_reduction_def:
10006 dump_printf (MSG_NOTE, "double reduction\n");
10007 break;
10008 case vect_nested_cycle:
10009 dump_printf (MSG_NOTE, "nested cycle\n");
10010 break;
10011 case vect_unknown_def_type:
10012 dump_printf (MSG_NOTE, "unknown\n");
10013 break;
10017 if (*dt == vect_unknown_def_type)
10019 if (dump_enabled_p ())
10020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10021 "Unsupported pattern.\n");
10022 return false;
10025 return true;
10028 /* Function vect_is_simple_use.
10030 Same as vect_is_simple_use but also determines the vector operand
10031 type of OPERAND and stores it to *VECTYPE. If the definition of
10032 OPERAND is vect_uninitialized_def, vect_constant_def or
10033 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10034 is responsible to compute the best suited vector type for the
10035 scalar operand. */
10037 bool
10038 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10039 tree *vectype, stmt_vec_info *def_stmt_info_out,
10040 gimple **def_stmt_out)
10042 stmt_vec_info def_stmt_info;
10043 gimple *def_stmt;
10044 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10045 return false;
10047 if (def_stmt_out)
10048 *def_stmt_out = def_stmt;
10049 if (def_stmt_info_out)
10050 *def_stmt_info_out = def_stmt_info;
10052 /* Now get a vector type if the def is internal, otherwise supply
10053 NULL_TREE and leave it up to the caller to figure out a proper
10054 type for the use stmt. */
10055 if (*dt == vect_internal_def
10056 || *dt == vect_induction_def
10057 || *dt == vect_reduction_def
10058 || *dt == vect_double_reduction_def
10059 || *dt == vect_nested_cycle)
10061 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10062 gcc_assert (*vectype != NULL_TREE);
10063 if (dump_enabled_p ())
10064 dump_printf_loc (MSG_NOTE, vect_location,
10065 "vect_is_simple_use: vectype %T\n", *vectype);
10067 else if (*dt == vect_uninitialized_def
10068 || *dt == vect_constant_def
10069 || *dt == vect_external_def)
10070 *vectype = NULL_TREE;
10071 else
10072 gcc_unreachable ();
10074 return true;
10078 /* Function supportable_widening_operation
10080 Check whether an operation represented by the code CODE is a
10081 widening operation that is supported by the target platform in
10082 vector form (i.e., when operating on arguments of type VECTYPE_IN
10083 producing a result of type VECTYPE_OUT).
10085 Widening operations we currently support are NOP (CONVERT), FLOAT,
10086 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10087 are supported by the target platform either directly (via vector
10088 tree-codes), or via target builtins.
10090 Output:
10091 - CODE1 and CODE2 are codes of vector operations to be used when
10092 vectorizing the operation, if available.
10093 - MULTI_STEP_CVT determines the number of required intermediate steps in
10094 case of multi-step conversion (like char->short->int - in that case
10095 MULTI_STEP_CVT will be 1).
10096 - INTERM_TYPES contains the intermediate type required to perform the
10097 widening operation (short in the above example). */
10099 bool
10100 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10101 tree vectype_out, tree vectype_in,
10102 enum tree_code *code1, enum tree_code *code2,
10103 int *multi_step_cvt,
10104 vec<tree> *interm_types)
10106 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10107 struct loop *vect_loop = NULL;
10108 machine_mode vec_mode;
10109 enum insn_code icode1, icode2;
10110 optab optab1, optab2;
10111 tree vectype = vectype_in;
10112 tree wide_vectype = vectype_out;
10113 enum tree_code c1, c2;
10114 int i;
10115 tree prev_type, intermediate_type;
10116 machine_mode intermediate_mode, prev_mode;
10117 optab optab3, optab4;
10119 *multi_step_cvt = 0;
10120 if (loop_info)
10121 vect_loop = LOOP_VINFO_LOOP (loop_info);
10123 switch (code)
10125 case WIDEN_MULT_EXPR:
10126 /* The result of a vectorized widening operation usually requires
10127 two vectors (because the widened results do not fit into one vector).
10128 The generated vector results would normally be expected to be
10129 generated in the same order as in the original scalar computation,
10130 i.e. if 8 results are generated in each vector iteration, they are
10131 to be organized as follows:
10132 vect1: [res1,res2,res3,res4],
10133 vect2: [res5,res6,res7,res8].
10135 However, in the special case that the result of the widening
10136 operation is used in a reduction computation only, the order doesn't
10137 matter (because when vectorizing a reduction we change the order of
10138 the computation). Some targets can take advantage of this and
10139 generate more efficient code. For example, targets like Altivec,
10140 that support widen_mult using a sequence of {mult_even,mult_odd}
10141 generate the following vectors:
10142 vect1: [res1,res3,res5,res7],
10143 vect2: [res2,res4,res6,res8].
10145 When vectorizing outer-loops, we execute the inner-loop sequentially
10146 (each vectorized inner-loop iteration contributes to VF outer-loop
10147 iterations in parallel). We therefore don't allow to change the
10148 order of the computation in the inner-loop during outer-loop
10149 vectorization. */
10150 /* TODO: Another case in which order doesn't *really* matter is when we
10151 widen and then contract again, e.g. (short)((int)x * y >> 8).
10152 Normally, pack_trunc performs an even/odd permute, whereas the
10153 repack from an even/odd expansion would be an interleave, which
10154 would be significantly simpler for e.g. AVX2. */
10155 /* In any case, in order to avoid duplicating the code below, recurse
10156 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10157 are properly set up for the caller. If we fail, we'll continue with
10158 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10159 if (vect_loop
10160 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10161 && !nested_in_vect_loop_p (vect_loop, stmt_info)
10162 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10163 stmt_info, vectype_out,
10164 vectype_in, code1, code2,
10165 multi_step_cvt, interm_types))
10167 /* Elements in a vector with vect_used_by_reduction property cannot
10168 be reordered if the use chain with this property does not have the
10169 same operation. One such an example is s += a * b, where elements
10170 in a and b cannot be reordered. Here we check if the vector defined
10171 by STMT is only directly used in the reduction statement. */
10172 tree lhs = gimple_assign_lhs (stmt_info->stmt);
10173 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10174 if (use_stmt_info
10175 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10176 return true;
10178 c1 = VEC_WIDEN_MULT_LO_EXPR;
10179 c2 = VEC_WIDEN_MULT_HI_EXPR;
10180 break;
10182 case DOT_PROD_EXPR:
10183 c1 = DOT_PROD_EXPR;
10184 c2 = DOT_PROD_EXPR;
10185 break;
10187 case SAD_EXPR:
10188 c1 = SAD_EXPR;
10189 c2 = SAD_EXPR;
10190 break;
10192 case VEC_WIDEN_MULT_EVEN_EXPR:
10193 /* Support the recursion induced just above. */
10194 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10195 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10196 break;
10198 case WIDEN_LSHIFT_EXPR:
10199 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10200 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10201 break;
10203 CASE_CONVERT:
10204 c1 = VEC_UNPACK_LO_EXPR;
10205 c2 = VEC_UNPACK_HI_EXPR;
10206 break;
10208 case FLOAT_EXPR:
10209 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10210 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10211 break;
10213 case FIX_TRUNC_EXPR:
10214 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10215 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10216 break;
10218 default:
10219 gcc_unreachable ();
10222 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10223 std::swap (c1, c2);
10225 if (code == FIX_TRUNC_EXPR)
10227 /* The signedness is determined from output operand. */
10228 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10229 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10231 else
10233 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10234 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10237 if (!optab1 || !optab2)
10238 return false;
10240 vec_mode = TYPE_MODE (vectype);
10241 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10242 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10243 return false;
10245 *code1 = c1;
10246 *code2 = c2;
10248 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10249 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10250 /* For scalar masks we may have different boolean
10251 vector types having the same QImode. Thus we
10252 add additional check for elements number. */
10253 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10254 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10255 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10257 /* Check if it's a multi-step conversion that can be done using intermediate
10258 types. */
10260 prev_type = vectype;
10261 prev_mode = vec_mode;
10263 if (!CONVERT_EXPR_CODE_P (code))
10264 return false;
10266 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10267 intermediate steps in promotion sequence. We try
10268 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10269 not. */
10270 interm_types->create (MAX_INTERM_CVT_STEPS);
10271 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10273 intermediate_mode = insn_data[icode1].operand[0].mode;
10274 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10276 intermediate_type = vect_halve_mask_nunits (prev_type);
10277 if (intermediate_mode != TYPE_MODE (intermediate_type))
10278 return false;
10280 else
10281 intermediate_type
10282 = lang_hooks.types.type_for_mode (intermediate_mode,
10283 TYPE_UNSIGNED (prev_type));
10285 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10286 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10288 if (!optab3 || !optab4
10289 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10290 || insn_data[icode1].operand[0].mode != intermediate_mode
10291 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10292 || insn_data[icode2].operand[0].mode != intermediate_mode
10293 || ((icode1 = optab_handler (optab3, intermediate_mode))
10294 == CODE_FOR_nothing)
10295 || ((icode2 = optab_handler (optab4, intermediate_mode))
10296 == CODE_FOR_nothing))
10297 break;
10299 interm_types->quick_push (intermediate_type);
10300 (*multi_step_cvt)++;
10302 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10303 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10304 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10305 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10306 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10308 prev_type = intermediate_type;
10309 prev_mode = intermediate_mode;
10312 interm_types->release ();
10313 return false;
10317 /* Function supportable_narrowing_operation
10319 Check whether an operation represented by the code CODE is a
10320 narrowing operation that is supported by the target platform in
10321 vector form (i.e., when operating on arguments of type VECTYPE_IN
10322 and producing a result of type VECTYPE_OUT).
10324 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10325 and FLOAT. This function checks if these operations are supported by
10326 the target platform directly via vector tree-codes.
10328 Output:
10329 - CODE1 is the code of a vector operation to be used when
10330 vectorizing the operation, if available.
10331 - MULTI_STEP_CVT determines the number of required intermediate steps in
10332 case of multi-step conversion (like int->short->char - in that case
10333 MULTI_STEP_CVT will be 1).
10334 - INTERM_TYPES contains the intermediate type required to perform the
10335 narrowing operation (short in the above example). */
10337 bool
10338 supportable_narrowing_operation (enum tree_code code,
10339 tree vectype_out, tree vectype_in,
10340 enum tree_code *code1, int *multi_step_cvt,
10341 vec<tree> *interm_types)
10343 machine_mode vec_mode;
10344 enum insn_code icode1;
10345 optab optab1, interm_optab;
10346 tree vectype = vectype_in;
10347 tree narrow_vectype = vectype_out;
10348 enum tree_code c1;
10349 tree intermediate_type, prev_type;
10350 machine_mode intermediate_mode, prev_mode;
10351 int i;
10352 bool uns;
10354 *multi_step_cvt = 0;
10355 switch (code)
10357 CASE_CONVERT:
10358 c1 = VEC_PACK_TRUNC_EXPR;
10359 break;
10361 case FIX_TRUNC_EXPR:
10362 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10363 break;
10365 case FLOAT_EXPR:
10366 c1 = VEC_PACK_FLOAT_EXPR;
10367 break;
10369 default:
10370 gcc_unreachable ();
10373 if (code == FIX_TRUNC_EXPR)
10374 /* The signedness is determined from output operand. */
10375 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10376 else
10377 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10379 if (!optab1)
10380 return false;
10382 vec_mode = TYPE_MODE (vectype);
10383 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10384 return false;
10386 *code1 = c1;
10388 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10389 /* For scalar masks we may have different boolean
10390 vector types having the same QImode. Thus we
10391 add additional check for elements number. */
10392 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10393 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10394 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10396 if (code == FLOAT_EXPR)
10397 return false;
10399 /* Check if it's a multi-step conversion that can be done using intermediate
10400 types. */
10401 prev_mode = vec_mode;
10402 prev_type = vectype;
10403 if (code == FIX_TRUNC_EXPR)
10404 uns = TYPE_UNSIGNED (vectype_out);
10405 else
10406 uns = TYPE_UNSIGNED (vectype);
10408 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10409 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10410 costly than signed. */
10411 if (code == FIX_TRUNC_EXPR && uns)
10413 enum insn_code icode2;
10415 intermediate_type
10416 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10417 interm_optab
10418 = optab_for_tree_code (c1, intermediate_type, optab_default);
10419 if (interm_optab != unknown_optab
10420 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10421 && insn_data[icode1].operand[0].mode
10422 == insn_data[icode2].operand[0].mode)
10424 uns = false;
10425 optab1 = interm_optab;
10426 icode1 = icode2;
10430 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10431 intermediate steps in promotion sequence. We try
10432 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10433 interm_types->create (MAX_INTERM_CVT_STEPS);
10434 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10436 intermediate_mode = insn_data[icode1].operand[0].mode;
10437 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10439 intermediate_type = vect_double_mask_nunits (prev_type);
10440 if (intermediate_mode != TYPE_MODE (intermediate_type))
10441 return false;
10443 else
10444 intermediate_type
10445 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10446 interm_optab
10447 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10448 optab_default);
10449 if (!interm_optab
10450 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10451 || insn_data[icode1].operand[0].mode != intermediate_mode
10452 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10453 == CODE_FOR_nothing))
10454 break;
10456 interm_types->quick_push (intermediate_type);
10457 (*multi_step_cvt)++;
10459 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10460 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10461 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10462 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10464 prev_mode = intermediate_mode;
10465 prev_type = intermediate_type;
10466 optab1 = interm_optab;
10469 interm_types->release ();
10470 return false;
10473 /* Generate and return a statement that sets vector mask MASK such that
10474 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10476 gcall *
10477 vect_gen_while (tree mask, tree start_index, tree end_index)
10479 tree cmp_type = TREE_TYPE (start_index);
10480 tree mask_type = TREE_TYPE (mask);
10481 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10482 cmp_type, mask_type,
10483 OPTIMIZE_FOR_SPEED));
10484 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10485 start_index, end_index,
10486 build_zero_cst (mask_type));
10487 gimple_call_set_lhs (call, mask);
10488 return call;
10491 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10492 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10494 tree
10495 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10496 tree end_index)
10498 tree tmp = make_ssa_name (mask_type);
10499 gcall *call = vect_gen_while (tmp, start_index, end_index);
10500 gimple_seq_add_stmt (seq, call);
10501 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10504 /* Try to compute the vector types required to vectorize STMT_INFO,
10505 returning true on success and false if vectorization isn't possible.
10507 On success:
10509 - Set *STMT_VECTYPE_OUT to:
10510 - NULL_TREE if the statement doesn't need to be vectorized;
10511 - boolean_type_node if the statement is a boolean operation whose
10512 vector type can only be determined once all the other vector types
10513 are known; and
10514 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10516 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10517 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10518 statement does not help to determine the overall number of units. */
10520 opt_result
10521 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10522 tree *stmt_vectype_out,
10523 tree *nunits_vectype_out)
10525 gimple *stmt = stmt_info->stmt;
10527 *stmt_vectype_out = NULL_TREE;
10528 *nunits_vectype_out = NULL_TREE;
10530 if (gimple_get_lhs (stmt) == NULL_TREE
10531 /* MASK_STORE has no lhs, but is ok. */
10532 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10534 if (is_a <gcall *> (stmt))
10536 /* Ignore calls with no lhs. These must be calls to
10537 #pragma omp simd functions, and what vectorization factor
10538 it really needs can't be determined until
10539 vectorizable_simd_clone_call. */
10540 if (dump_enabled_p ())
10541 dump_printf_loc (MSG_NOTE, vect_location,
10542 "defer to SIMD clone analysis.\n");
10543 return opt_result::success ();
10546 return opt_result::failure_at (stmt,
10547 "not vectorized: irregular stmt.%G", stmt);
10550 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10551 return opt_result::failure_at (stmt,
10552 "not vectorized: vector stmt in loop:%G",
10553 stmt);
10555 tree vectype;
10556 tree scalar_type = NULL_TREE;
10557 if (STMT_VINFO_VECTYPE (stmt_info))
10558 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10559 else
10561 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10562 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10563 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10564 else
10565 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10567 /* Pure bool ops don't participate in number-of-units computation.
10568 For comparisons use the types being compared. */
10569 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10570 && is_gimple_assign (stmt)
10571 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10573 *stmt_vectype_out = boolean_type_node;
10575 tree rhs1 = gimple_assign_rhs1 (stmt);
10576 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10577 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10578 scalar_type = TREE_TYPE (rhs1);
10579 else
10581 if (dump_enabled_p ())
10582 dump_printf_loc (MSG_NOTE, vect_location,
10583 "pure bool operation.\n");
10584 return opt_result::success ();
10588 if (dump_enabled_p ())
10589 dump_printf_loc (MSG_NOTE, vect_location,
10590 "get vectype for scalar type: %T\n", scalar_type);
10591 vectype = get_vectype_for_scalar_type (scalar_type);
10592 if (!vectype)
10593 return opt_result::failure_at (stmt,
10594 "not vectorized:"
10595 " unsupported data-type %T\n",
10596 scalar_type);
10598 if (!*stmt_vectype_out)
10599 *stmt_vectype_out = vectype;
10601 if (dump_enabled_p ())
10602 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
10605 /* Don't try to compute scalar types if the stmt produces a boolean
10606 vector; use the existing vector type instead. */
10607 tree nunits_vectype;
10608 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10609 nunits_vectype = vectype;
10610 else
10612 /* The number of units is set according to the smallest scalar
10613 type (or the largest vector size, but we only support one
10614 vector size per vectorization). */
10615 if (*stmt_vectype_out != boolean_type_node)
10617 HOST_WIDE_INT dummy;
10618 scalar_type = vect_get_smallest_scalar_type (stmt_info,
10619 &dummy, &dummy);
10621 if (dump_enabled_p ())
10622 dump_printf_loc (MSG_NOTE, vect_location,
10623 "get vectype for scalar type: %T\n", scalar_type);
10624 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10626 if (!nunits_vectype)
10627 return opt_result::failure_at (stmt,
10628 "not vectorized: unsupported data-type %T\n",
10629 scalar_type);
10631 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10632 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10633 return opt_result::failure_at (stmt,
10634 "not vectorized: different sized vector "
10635 "types in statement, %T and %T\n",
10636 vectype, nunits_vectype);
10638 if (dump_enabled_p ())
10640 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10641 nunits_vectype);
10643 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10644 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10645 dump_printf (MSG_NOTE, "\n");
10648 *nunits_vectype_out = nunits_vectype;
10649 return opt_result::success ();
10652 /* Try to determine the correct vector type for STMT_INFO, which is a
10653 statement that produces a scalar boolean result. Return the vector
10654 type on success, otherwise return NULL_TREE. */
10656 opt_tree
10657 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10659 gimple *stmt = stmt_info->stmt;
10660 tree mask_type = NULL;
10661 tree vectype, scalar_type;
10663 if (is_gimple_assign (stmt)
10664 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10665 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10667 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10668 mask_type = get_mask_type_for_scalar_type (scalar_type);
10670 if (!mask_type)
10671 return opt_tree::failure_at (stmt,
10672 "not vectorized: unsupported mask\n");
10674 else
10676 tree rhs;
10677 ssa_op_iter iter;
10678 enum vect_def_type dt;
10680 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10682 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10683 return opt_tree::failure_at (stmt,
10684 "not vectorized:can't compute mask"
10685 " type for statement, %G", stmt);
10687 /* No vectype probably means external definition.
10688 Allow it in case there is another operand which
10689 allows to determine mask type. */
10690 if (!vectype)
10691 continue;
10693 if (!mask_type)
10694 mask_type = vectype;
10695 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10696 TYPE_VECTOR_SUBPARTS (vectype)))
10697 return opt_tree::failure_at (stmt,
10698 "not vectorized: different sized mask"
10699 " types in statement, %T and %T\n",
10700 mask_type, vectype);
10701 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10702 != VECTOR_BOOLEAN_TYPE_P (vectype))
10703 return opt_tree::failure_at (stmt,
10704 "not vectorized: mixed mask and "
10705 "nonmask vector types in statement, "
10706 "%T and %T\n",
10707 mask_type, vectype);
10710 /* We may compare boolean value loaded as vector of integers.
10711 Fix mask_type in such case. */
10712 if (mask_type
10713 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10714 && gimple_code (stmt) == GIMPLE_ASSIGN
10715 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10716 mask_type = build_same_sized_truth_vector_type (mask_type);
10719 /* No mask_type should mean loop invariant predicate.
10720 This is probably a subject for optimization in if-conversion. */
10721 if (!mask_type)
10722 return opt_tree::failure_at (stmt,
10723 "not vectorized: can't compute mask type "
10724 "for statement: %G", stmt);
10726 return opt_tree::success (mask_type);