2018-11-09 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-stmts.c
blob8133149b2dce1d354fa8f9bb46553ccb4d291419
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
101 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
102 body_cost_vec->safe_push (si);
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
111 static tree
112 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT_INFO and the vector is associated
121 with scalar destination SCALAR_DEST. */
123 static tree
124 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
125 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
127 tree vect_type, vect, vect_name, array_ref;
128 gimple *new_stmt;
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
140 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
142 return vect_name;
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT_INFO. */
149 static void
150 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
151 tree vect, tree array, unsigned HOST_WIDE_INT n)
153 tree array_ref;
154 gimple *new_stmt;
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
160 new_stmt = gimple_build_assign (array_ref, vect);
161 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
168 static tree
169 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
171 tree mem_ref;
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
176 return mem_ref;
179 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
180 Emit the clobber before *GSI. */
182 static void
183 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
184 tree var)
186 tree clobber = build_clobber (TREE_TYPE (var));
187 gimple *new_stmt = gimple_build_assign (var, clobber);
188 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
197 static void
198 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
199 enum vect_relevant relevant, bool live_p)
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
204 if (dump_enabled_p ())
205 dump_printf_loc (MSG_NOTE, vect_location,
206 "mark relevant %d, live %d: %G", relevant, live_p,
207 stmt_info->stmt);
209 /* If this stmt is an original stmt in a pattern, we might need to mark its
210 related pattern stmt instead of the original stmt. However, such stmts
211 may have their own uses that are not in any pattern, in such cases the
212 stmt itself should be marked. */
213 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
215 /* This is the last stmt in a sequence that was detected as a
216 pattern that can potentially be vectorized. Don't mark the stmt
217 as relevant/live because it's not going to be vectorized.
218 Instead mark the pattern-stmt that replaces it. */
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_vec_info old_stmt_info = stmt_info;
225 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
226 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
227 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
228 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE, vect_location,
240 "already marked relevant/live.\n");
241 return;
244 worklist->safe_push (stmt_info);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT_INFO is simple and all uses of it are invariant. */
252 bool
253 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
254 loop_vec_info loop_vinfo)
256 tree op;
257 ssa_op_iter iter;
259 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
260 if (!stmt)
261 return false;
263 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
265 enum vect_def_type dt = vect_uninitialized_def;
267 if (!vect_is_simple_use (op, loop_vinfo, &dt))
269 if (dump_enabled_p ())
270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
271 "use not simple.\n");
272 return false;
275 if (dt != vect_external_def && dt != vect_constant_def)
276 return false;
278 return true;
281 /* Function vect_stmt_relevant_p.
283 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
284 is "relevant for vectorization".
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
291 CHECKME: what other side effects would the vectorizer allow? */
293 static bool
294 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
295 enum vect_relevant *relevant, bool *live_p)
297 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
298 ssa_op_iter op_iter;
299 imm_use_iterator imm_iter;
300 use_operand_p use_p;
301 def_operand_p def_p;
303 *relevant = vect_unused_in_scope;
304 *live_p = false;
306 /* cond stmt other than loop exit cond. */
307 if (is_ctrl_stmt (stmt_info->stmt)
308 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
311 /* changing memory. */
312 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt_info->stmt)
314 && !gimple_clobber_p (stmt_info->stmt))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
342 *live_p = true;
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
356 return (*live_p || *relevant);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT_INFO. Check if USE is
363 used in STMT_INFO for anything other than indexing an array. */
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
368 tree operand;
370 /* USE corresponds to some operand in STMT. If there is no data
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info))
374 return true;
376 /* STMT has a data_ref. FORNOW this means that its of one of
377 the following forms:
378 -1- ARRAY_REF = var
379 -2- var = ARRAY_REF
380 (This should have been verified in analyze_data_refs).
382 'var' in the second case corresponds to a def, not a use,
383 so USE cannot correspond to any operands that are not used
384 for array indexing.
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
389 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
390 if (!assign || !gimple_assign_copy_p (assign))
392 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
393 if (call && gimple_call_internal_p (call))
395 internal_fn ifn = gimple_call_internal_fn (call);
396 int mask_index = internal_fn_mask_index (ifn);
397 if (mask_index >= 0
398 && use == gimple_call_arg (call, mask_index))
399 return true;
400 int stored_value_index = internal_fn_stored_value_index (ifn);
401 if (stored_value_index >= 0
402 && use == gimple_call_arg (call, stored_value_index))
403 return true;
404 if (internal_gather_scatter_fn_p (ifn)
405 && use == gimple_call_arg (call, 1))
406 return true;
408 return false;
411 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
412 return false;
413 operand = gimple_assign_rhs1 (assign);
414 if (TREE_CODE (operand) != SSA_NAME)
415 return false;
417 if (operand == use)
418 return true;
420 return false;
425 Function process_use.
427 Inputs:
428 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433 be performed.
435 Outputs:
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
440 Exceptions:
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
445 we skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
447 "relevant" will be modified accordingly.
449 Return true if everything is as expected. Return false otherwise. */
451 static opt_result
452 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
453 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
454 bool force)
456 stmt_vec_info dstmt_vinfo;
457 basic_block bb, def_bb;
458 enum vect_def_type dt;
460 /* case 1: we are only interested in uses that need to be vectorized. Uses
461 that are used for address computation are not considered relevant. */
462 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
463 return opt_result::success ();
465 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
466 return opt_result::failure_at (stmt_vinfo->stmt,
467 "not vectorized:"
468 " unsupported use in stmt.\n");
470 if (!dstmt_vinfo)
471 return opt_result::success ();
473 def_bb = gimple_bb (dstmt_vinfo->stmt);
475 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
476 DSTMT_VINFO must have already been processed, because this should be the
477 only way that STMT, which is a reduction-phi, was put in the worklist,
478 as there should be no other uses for DSTMT_VINFO in the loop. So we just
479 check that everything is as expected, and we are done. */
480 bb = gimple_bb (stmt_vinfo->stmt);
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
491 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
492 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
610 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
652 use_operand_p use_p;
653 ssa_op_iter iter;
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
704 default:
705 break;
708 if (is_pattern_stmt_p (stmt_vinfo))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
732 for (; i < gimple_num_ops (assign); i++)
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
747 for (i = 0; i < gimple_call_num_args (call); i++)
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 return res;
780 } /* while worklist */
782 return opt_result::success ();
785 /* Compute the prologue cost for invariant or constant operands. */
787 static unsigned
788 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
789 unsigned opno, enum vect_def_type dt,
790 stmt_vector_for_cost *cost_vec)
792 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
793 tree op = gimple_op (stmt, opno);
794 unsigned prologue_cost = 0;
796 /* Without looking at the actual initializer a vector of
797 constants can be implemented as load from the constant pool.
798 When all elements are the same we can use a splat. */
799 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
800 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
801 unsigned num_vects_to_check;
802 unsigned HOST_WIDE_INT const_nunits;
803 unsigned nelt_limit;
804 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
805 && ! multiple_p (const_nunits, group_size))
807 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
808 nelt_limit = const_nunits;
810 else
812 /* If either the vector has variable length or the vectors
813 are composed of repeated whole groups we only need to
814 cost construction once. All vectors will be the same. */
815 num_vects_to_check = 1;
816 nelt_limit = group_size;
818 tree elt = NULL_TREE;
819 unsigned nelt = 0;
820 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
822 unsigned si = j % group_size;
823 if (nelt == 0)
824 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
825 /* ??? We're just tracking whether all operands of a single
826 vector initializer are the same, ideally we'd check if
827 we emitted the same one already. */
828 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
829 opno))
830 elt = NULL_TREE;
831 nelt++;
832 if (nelt == nelt_limit)
834 /* ??? We need to pass down stmt_info for a vector type
835 even if it points to the wrong stmt. */
836 prologue_cost += record_stmt_cost
837 (cost_vec, 1,
838 dt == vect_external_def
839 ? (elt ? scalar_to_vec : vec_construct)
840 : vector_load,
841 stmt_info, 0, vect_prologue);
842 nelt = 0;
846 return prologue_cost;
849 /* Function vect_model_simple_cost.
851 Models cost for simple operations, i.e. those that only emit ncopies of a
852 single op. Right now, this does not account for multiple insns that could
853 be generated for the single vector op. We will handle that shortly. */
855 static void
856 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
857 enum vect_def_type *dt,
858 int ndts,
859 slp_tree node,
860 stmt_vector_for_cost *cost_vec)
862 int inside_cost = 0, prologue_cost = 0;
864 gcc_assert (cost_vec != NULL);
866 /* ??? Somehow we need to fix this at the callers. */
867 if (node)
868 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
870 if (node)
872 /* Scan operands and account for prologue cost of constants/externals.
873 ??? This over-estimates cost for multiple uses and should be
874 re-engineered. */
875 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
876 tree lhs = gimple_get_lhs (stmt);
877 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
879 tree op = gimple_op (stmt, i);
880 enum vect_def_type dt;
881 if (!op || op == lhs)
882 continue;
883 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
884 && (dt == vect_constant_def || dt == vect_external_def))
885 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
886 i, dt, cost_vec);
889 else
890 /* Cost the "broadcast" of a scalar operand in to a vector operand.
891 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
892 cost model. */
893 for (int i = 0; i < ndts; i++)
894 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
895 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
896 stmt_info, 0, vect_prologue);
898 /* Adjust for two-operator SLP nodes. */
899 if (node && SLP_TREE_TWO_OPERATORS (node))
901 ncopies *= 2;
902 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
903 stmt_info, 0, vect_body);
906 /* Pass the inside-of-loop statements to the target-specific cost model. */
907 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
908 stmt_info, 0, vect_body);
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE, vect_location,
912 "vect_model_simple_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost, prologue_cost);
917 /* Model cost for type demotion and promotion operations. PWR is normally
918 zero for single-step promotions and demotions. It will be one if
919 two-step promotion/demotion is required, and so on. Each additional
920 step doubles the number of instructions required. */
922 static void
923 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
924 enum vect_def_type *dt, int pwr,
925 stmt_vector_for_cost *cost_vec)
927 int i, tmp;
928 int inside_cost = 0, prologue_cost = 0;
930 for (i = 0; i < pwr + 1; i++)
932 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
933 (i + 1) : i;
934 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
935 vec_promote_demote, stmt_info, 0,
936 vect_body);
939 /* FORNOW: Assuming maximum 2 args per stmts. */
940 for (i = 0; i < 2; i++)
941 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
942 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
943 stmt_info, 0, vect_prologue);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "vect_model_promotion_demotion_cost: inside_cost = %d, "
948 "prologue_cost = %d .\n", inside_cost, prologue_cost);
951 /* Function vect_model_store_cost
953 Models cost for stores. In the case of grouped accesses, one access
954 has the overhead of the grouped access attributed to it. */
956 static void
957 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
958 enum vect_def_type dt,
959 vect_memory_access_type memory_access_type,
960 vec_load_store_type vls_type, slp_tree slp_node,
961 stmt_vector_for_cost *cost_vec)
963 unsigned int inside_cost = 0, prologue_cost = 0;
964 stmt_vec_info first_stmt_info = stmt_info;
965 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
967 /* ??? Somehow we need to fix this at the callers. */
968 if (slp_node)
969 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
971 if (vls_type == VLS_STORE_INVARIANT)
973 if (slp_node)
974 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
975 1, dt, cost_vec);
976 else
977 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
978 stmt_info, 0, vect_prologue);
981 /* Grouped stores update all elements in the group at once,
982 so we want the DR for the first statement. */
983 if (!slp_node && grouped_access_p)
984 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
986 /* True if we should include any once-per-group costs as well as
987 the cost of the statement itself. For SLP we only get called
988 once per group anyhow. */
989 bool first_stmt_p = (first_stmt_info == stmt_info);
991 /* We assume that the cost of a single store-lanes instruction is
992 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
993 access is instead being provided by a permute-and-store operation,
994 include the cost of the permutes. */
995 if (first_stmt_p
996 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
998 /* Uses a high and low interleave or shuffle operations for each
999 needed permute. */
1000 int group_size = DR_GROUP_SIZE (first_stmt_info);
1001 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1002 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1003 stmt_info, 0, vect_body);
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE, vect_location,
1007 "vect_model_store_cost: strided group_size = %d .\n",
1008 group_size);
1011 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1012 /* Costs of the stores. */
1013 if (memory_access_type == VMAT_ELEMENTWISE
1014 || memory_access_type == VMAT_GATHER_SCATTER)
1016 /* N scalar stores plus extracting the elements. */
1017 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1018 inside_cost += record_stmt_cost (cost_vec,
1019 ncopies * assumed_nunits,
1020 scalar_store, stmt_info, 0, vect_body);
1022 else
1023 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1025 if (memory_access_type == VMAT_ELEMENTWISE
1026 || memory_access_type == VMAT_STRIDED_SLP)
1028 /* N scalar stores plus extracting the elements. */
1029 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1030 inside_cost += record_stmt_cost (cost_vec,
1031 ncopies * assumed_nunits,
1032 vec_to_scalar, stmt_info, 0, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_store_cost: inside_cost = %d, "
1038 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1042 /* Calculate cost of DR's memory access. */
1043 void
1044 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1045 unsigned int *inside_cost,
1046 stmt_vector_for_cost *body_cost_vec)
1048 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1049 int alignment_support_scheme
1050 = vect_supportable_dr_alignment (dr_info, false);
1052 switch (alignment_support_scheme)
1054 case dr_aligned:
1056 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1057 vector_store, stmt_info, 0,
1058 vect_body);
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE, vect_location,
1062 "vect_model_store_cost: aligned.\n");
1063 break;
1066 case dr_unaligned_supported:
1068 /* Here, we assign an additional cost for the unaligned store. */
1069 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1070 unaligned_store, stmt_info,
1071 DR_MISALIGNMENT (dr_info),
1072 vect_body);
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_NOTE, vect_location,
1075 "vect_model_store_cost: unaligned supported by "
1076 "hardware.\n");
1077 break;
1080 case dr_unaligned_unsupported:
1082 *inside_cost = VECT_MAX_COST;
1084 if (dump_enabled_p ())
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1086 "vect_model_store_cost: unsupported access.\n");
1087 break;
1090 default:
1091 gcc_unreachable ();
1096 /* Function vect_model_load_cost
1098 Models cost for loads. In the case of grouped accesses, one access has
1099 the overhead of the grouped access attributed to it. Since unaligned
1100 accesses are supported for loads, we also account for the costs of the
1101 access scheme chosen. */
1103 static void
1104 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1105 vect_memory_access_type memory_access_type,
1106 slp_instance instance,
1107 slp_tree slp_node,
1108 stmt_vector_for_cost *cost_vec)
1110 unsigned int inside_cost = 0, prologue_cost = 0;
1111 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1113 gcc_assert (cost_vec);
1115 /* ??? Somehow we need to fix this at the callers. */
1116 if (slp_node)
1117 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1119 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1121 /* If the load is permuted then the alignment is determined by
1122 the first group element not by the first scalar stmt DR. */
1123 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1124 /* Record the cost for the permutation. */
1125 unsigned n_perms;
1126 unsigned assumed_nunits
1127 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1128 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1129 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1130 slp_vf, instance, true,
1131 &n_perms);
1132 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1133 first_stmt_info, 0, vect_body);
1134 /* And adjust the number of loads performed. This handles
1135 redundancies as well as loads that are later dead. */
1136 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1137 bitmap_clear (perm);
1138 for (unsigned i = 0;
1139 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1140 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1141 ncopies = 0;
1142 bool load_seen = false;
1143 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1145 if (i % assumed_nunits == 0)
1147 if (load_seen)
1148 ncopies++;
1149 load_seen = false;
1151 if (bitmap_bit_p (perm, i))
1152 load_seen = true;
1154 if (load_seen)
1155 ncopies++;
1156 gcc_assert (ncopies
1157 <= (DR_GROUP_SIZE (first_stmt_info)
1158 - DR_GROUP_GAP (first_stmt_info)
1159 + assumed_nunits - 1) / assumed_nunits);
1162 /* Grouped loads read all elements in the group at once,
1163 so we want the DR for the first statement. */
1164 stmt_vec_info first_stmt_info = stmt_info;
1165 if (!slp_node && grouped_access_p)
1166 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1168 /* True if we should include any once-per-group costs as well as
1169 the cost of the statement itself. For SLP we only get called
1170 once per group anyhow. */
1171 bool first_stmt_p = (first_stmt_info == stmt_info);
1173 /* We assume that the cost of a single load-lanes instruction is
1174 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1175 access is instead being provided by a load-and-permute operation,
1176 include the cost of the permutes. */
1177 if (first_stmt_p
1178 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1180 /* Uses an even and odd extract operations or shuffle operations
1181 for each needed permute. */
1182 int group_size = DR_GROUP_SIZE (first_stmt_info);
1183 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1184 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1185 stmt_info, 0, vect_body);
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: strided group_size = %d .\n",
1190 group_size);
1193 /* The loads themselves. */
1194 if (memory_access_type == VMAT_ELEMENTWISE
1195 || memory_access_type == VMAT_GATHER_SCATTER)
1197 /* N scalar loads plus gathering them into a vector. */
1198 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1199 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1200 inside_cost += record_stmt_cost (cost_vec,
1201 ncopies * assumed_nunits,
1202 scalar_load, stmt_info, 0, vect_body);
1204 else
1205 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1206 &inside_cost, &prologue_cost,
1207 cost_vec, cost_vec, true);
1208 if (memory_access_type == VMAT_ELEMENTWISE
1209 || memory_access_type == VMAT_STRIDED_SLP)
1210 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1211 stmt_info, 0, vect_body);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: inside_cost = %d, "
1216 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1220 /* Calculate cost of DR's memory access. */
1221 void
1222 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1223 bool add_realign_cost, unsigned int *inside_cost,
1224 unsigned int *prologue_cost,
1225 stmt_vector_for_cost *prologue_cost_vec,
1226 stmt_vector_for_cost *body_cost_vec,
1227 bool record_prologue_costs)
1229 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1230 int alignment_support_scheme
1231 = vect_supportable_dr_alignment (dr_info, false);
1233 switch (alignment_support_scheme)
1235 case dr_aligned:
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1238 stmt_info, 0, vect_body);
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE, vect_location,
1242 "vect_model_load_cost: aligned.\n");
1244 break;
1246 case dr_unaligned_supported:
1248 /* Here, we assign an additional cost for the unaligned load. */
1249 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1250 unaligned_load, stmt_info,
1251 DR_MISALIGNMENT (dr_info),
1252 vect_body);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1259 break;
1261 case dr_explicit_realign:
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1279 break;
1281 case dr_explicit_realign_optimized:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost && record_prologue_costs)
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1316 break;
1319 case dr_unaligned_unsupported:
1321 *inside_cost = VECT_MAX_COST;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1329 default:
1330 gcc_unreachable ();
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1337 static void
1338 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1341 if (gsi)
1342 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1343 else
1345 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1347 if (loop_vinfo)
1349 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1350 basic_block new_bb;
1351 edge pe;
1353 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1354 loop = loop->inner;
1356 pe = loop_preheader_edge (loop);
1357 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1358 gcc_assert (!new_bb);
1360 else
1362 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1363 basic_block bb;
1364 gimple_stmt_iterator gsi_bb_start;
1366 gcc_assert (bb_vinfo);
1367 bb = BB_VINFO_BB (bb_vinfo);
1368 gsi_bb_start = gsi_after_labels (bb);
1369 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1373 if (dump_enabled_p ())
1374 dump_printf_loc (MSG_NOTE, vect_location,
1375 "created new init_stmt: %G", new_stmt);
1378 /* Function vect_init_vector.
1380 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1381 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1382 vector type a vector with all elements equal to VAL is created first.
1383 Place the initialization at BSI if it is not NULL. Otherwise, place the
1384 initialization at the loop preheader.
1385 Return the DEF of INIT_STMT.
1386 It will be used in the vectorization of STMT_INFO. */
1388 tree
1389 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1390 gimple_stmt_iterator *gsi)
1392 gimple *init_stmt;
1393 tree new_temp;
1395 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1396 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1398 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1399 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1401 /* Scalar boolean value should be transformed into
1402 all zeros or all ones value before building a vector. */
1403 if (VECTOR_BOOLEAN_TYPE_P (type))
1405 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1406 tree false_val = build_zero_cst (TREE_TYPE (type));
1408 if (CONSTANT_CLASS_P (val))
1409 val = integer_zerop (val) ? false_val : true_val;
1410 else
1412 new_temp = make_ssa_name (TREE_TYPE (type));
1413 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1414 val, true_val, false_val);
1415 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1416 val = new_temp;
1419 else if (CONSTANT_CLASS_P (val))
1420 val = fold_convert (TREE_TYPE (type), val);
1421 else
1423 new_temp = make_ssa_name (TREE_TYPE (type));
1424 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1425 init_stmt = gimple_build_assign (new_temp,
1426 fold_build1 (VIEW_CONVERT_EXPR,
1427 TREE_TYPE (type),
1428 val));
1429 else
1430 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1431 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1432 val = new_temp;
1435 val = build_vector_from_val (type, val);
1438 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1439 init_stmt = gimple_build_assign (new_temp, val);
1440 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1441 return new_temp;
1444 /* Function vect_get_vec_def_for_operand_1.
1446 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1447 with type DT that will be used in the vectorized stmt. */
1449 tree
1450 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1451 enum vect_def_type dt)
1453 tree vec_oprnd;
1454 stmt_vec_info vec_stmt_info;
1456 switch (dt)
1458 /* operand is a constant or a loop invariant. */
1459 case vect_constant_def:
1460 case vect_external_def:
1461 /* Code should use vect_get_vec_def_for_operand. */
1462 gcc_unreachable ();
1464 /* Operand is defined by a loop header phi. In case of nested
1465 cycles we also may have uses of the backedge def. */
1466 case vect_reduction_def:
1467 case vect_double_reduction_def:
1468 case vect_nested_cycle:
1469 case vect_induction_def:
1470 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1471 || dt == vect_nested_cycle);
1472 /* Fallthru. */
1474 /* operand is defined inside the loop. */
1475 case vect_internal_def:
1477 /* Get the def from the vectorized stmt. */
1478 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1479 /* Get vectorized pattern statement. */
1480 if (!vec_stmt_info
1481 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1482 && !STMT_VINFO_RELEVANT (def_stmt_info))
1483 vec_stmt_info = (STMT_VINFO_VEC_STMT
1484 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1485 gcc_assert (vec_stmt_info);
1486 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1487 vec_oprnd = PHI_RESULT (phi);
1488 else
1489 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1490 return vec_oprnd;
1493 default:
1494 gcc_unreachable ();
1499 /* Function vect_get_vec_def_for_operand.
1501 OP is an operand in STMT_VINFO. This function returns a (vector) def
1502 that will be used in the vectorized stmt for STMT_VINFO.
1504 In the case that OP is an SSA_NAME which is defined in the loop, then
1505 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1507 In case OP is an invariant or constant, a new stmt that creates a vector def
1508 needs to be introduced. VECTYPE may be used to specify a required type for
1509 vector invariant. */
1511 tree
1512 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1514 gimple *def_stmt;
1515 enum vect_def_type dt;
1516 bool is_simple_use;
1517 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location,
1521 "vect_get_vec_def_for_operand: %T\n", op);
1523 stmt_vec_info def_stmt_info;
1524 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1525 &def_stmt_info, &def_stmt);
1526 gcc_assert (is_simple_use);
1527 if (def_stmt && dump_enabled_p ())
1528 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1530 if (dt == vect_constant_def || dt == vect_external_def)
1532 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1533 tree vector_type;
1535 if (vectype)
1536 vector_type = vectype;
1537 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1538 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1539 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1540 else
1541 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1543 gcc_assert (vector_type);
1544 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1546 else
1547 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1551 /* Function vect_get_vec_def_for_stmt_copy
1553 Return a vector-def for an operand. This function is used when the
1554 vectorized stmt to be created (by the caller to this function) is a "copy"
1555 created in case the vectorized result cannot fit in one vector, and several
1556 copies of the vector-stmt are required. In this case the vector-def is
1557 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1558 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1560 Context:
1561 In case the vectorization factor (VF) is bigger than the number
1562 of elements that can fit in a vectype (nunits), we have to generate
1563 more than one vector stmt to vectorize the scalar stmt. This situation
1564 arises when there are multiple data-types operated upon in the loop; the
1565 smallest data-type determines the VF, and as a result, when vectorizing
1566 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1567 vector stmt (each computing a vector of 'nunits' results, and together
1568 computing 'VF' results in each iteration). This function is called when
1569 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1570 which VF=16 and nunits=4, so the number of copies required is 4):
1572 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1574 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1575 VS1.1: vx.1 = memref1 VS1.2
1576 VS1.2: vx.2 = memref2 VS1.3
1577 VS1.3: vx.3 = memref3
1579 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1580 VSnew.1: vz1 = vx.1 + ... VSnew.2
1581 VSnew.2: vz2 = vx.2 + ... VSnew.3
1582 VSnew.3: vz3 = vx.3 + ...
1584 The vectorization of S1 is explained in vectorizable_load.
1585 The vectorization of S2:
1586 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1587 the function 'vect_get_vec_def_for_operand' is called to
1588 get the relevant vector-def for each operand of S2. For operand x it
1589 returns the vector-def 'vx.0'.
1591 To create the remaining copies of the vector-stmt (VSnew.j), this
1592 function is called to get the relevant vector-def for each operand. It is
1593 obtained from the respective VS1.j stmt, which is recorded in the
1594 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1596 For example, to obtain the vector-def 'vx.1' in order to create the
1597 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1598 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1599 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1600 and return its def ('vx.1').
1601 Overall, to create the above sequence this function will be called 3 times:
1602 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1603 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1604 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1606 tree
1607 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1609 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1610 if (!def_stmt_info)
1611 /* Do nothing; can reuse same def. */
1612 return vec_oprnd;
1614 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1615 gcc_assert (def_stmt_info);
1616 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1617 vec_oprnd = PHI_RESULT (phi);
1618 else
1619 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1620 return vec_oprnd;
1624 /* Get vectorized definitions for the operands to create a copy of an original
1625 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1627 void
1628 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1629 vec<tree> *vec_oprnds0,
1630 vec<tree> *vec_oprnds1)
1632 tree vec_oprnd = vec_oprnds0->pop ();
1634 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1635 vec_oprnds0->quick_push (vec_oprnd);
1637 if (vec_oprnds1 && vec_oprnds1->length ())
1639 vec_oprnd = vec_oprnds1->pop ();
1640 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1641 vec_oprnds1->quick_push (vec_oprnd);
1646 /* Get vectorized definitions for OP0 and OP1. */
1648 void
1649 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1650 vec<tree> *vec_oprnds0,
1651 vec<tree> *vec_oprnds1,
1652 slp_tree slp_node)
1654 if (slp_node)
1656 int nops = (op1 == NULL_TREE) ? 1 : 2;
1657 auto_vec<tree> ops (nops);
1658 auto_vec<vec<tree> > vec_defs (nops);
1660 ops.quick_push (op0);
1661 if (op1)
1662 ops.quick_push (op1);
1664 vect_get_slp_defs (ops, slp_node, &vec_defs);
1666 *vec_oprnds0 = vec_defs[0];
1667 if (op1)
1668 *vec_oprnds1 = vec_defs[1];
1670 else
1672 tree vec_oprnd;
1674 vec_oprnds0->create (1);
1675 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1676 vec_oprnds0->quick_push (vec_oprnd);
1678 if (op1)
1680 vec_oprnds1->create (1);
1681 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1682 vec_oprnds1->quick_push (vec_oprnd);
1687 /* Helper function called by vect_finish_replace_stmt and
1688 vect_finish_stmt_generation. Set the location of the new
1689 statement and create and return a stmt_vec_info for it. */
1691 static stmt_vec_info
1692 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1694 vec_info *vinfo = stmt_info->vinfo;
1696 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1698 if (dump_enabled_p ())
1699 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1701 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1703 /* While EH edges will generally prevent vectorization, stmt might
1704 e.g. be in a must-not-throw region. Ensure newly created stmts
1705 that could throw are part of the same region. */
1706 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1707 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1708 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1710 return vec_stmt_info;
1713 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1714 which sets the same scalar result as STMT_INFO did. Create and return a
1715 stmt_vec_info for VEC_STMT. */
1717 stmt_vec_info
1718 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1720 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1722 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1723 gsi_replace (&gsi, vec_stmt, false);
1725 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1728 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1729 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1731 stmt_vec_info
1732 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1733 gimple_stmt_iterator *gsi)
1735 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1737 if (!gsi_end_p (*gsi)
1738 && gimple_has_mem_ops (vec_stmt))
1740 gimple *at_stmt = gsi_stmt (*gsi);
1741 tree vuse = gimple_vuse (at_stmt);
1742 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1744 tree vdef = gimple_vdef (at_stmt);
1745 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1746 /* If we have an SSA vuse and insert a store, update virtual
1747 SSA form to avoid triggering the renamer. Do so only
1748 if we can easily see all uses - which is what almost always
1749 happens with the way vectorized stmts are inserted. */
1750 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1751 && ((is_gimple_assign (vec_stmt)
1752 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1753 || (is_gimple_call (vec_stmt)
1754 && !(gimple_call_flags (vec_stmt)
1755 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1757 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1758 gimple_set_vdef (vec_stmt, new_vdef);
1759 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1763 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1764 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1767 /* We want to vectorize a call to combined function CFN with function
1768 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1769 as the types of all inputs. Check whether this is possible using
1770 an internal function, returning its code if so or IFN_LAST if not. */
1772 static internal_fn
1773 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1774 tree vectype_out, tree vectype_in)
1776 internal_fn ifn;
1777 if (internal_fn_p (cfn))
1778 ifn = as_internal_fn (cfn);
1779 else
1780 ifn = associated_internal_fn (fndecl);
1781 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1783 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1784 if (info.vectorizable)
1786 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1787 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1788 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1789 OPTIMIZE_FOR_SPEED))
1790 return ifn;
1793 return IFN_LAST;
1797 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1798 gimple_stmt_iterator *);
1800 /* Check whether a load or store statement in the loop described by
1801 LOOP_VINFO is possible in a fully-masked loop. This is testing
1802 whether the vectorizer pass has the appropriate support, as well as
1803 whether the target does.
1805 VLS_TYPE says whether the statement is a load or store and VECTYPE
1806 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1807 says how the load or store is going to be implemented and GROUP_SIZE
1808 is the number of load or store statements in the containing group.
1809 If the access is a gather load or scatter store, GS_INFO describes
1810 its arguments.
1812 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1813 supported, otherwise record the required mask types. */
1815 static void
1816 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1817 vec_load_store_type vls_type, int group_size,
1818 vect_memory_access_type memory_access_type,
1819 gather_scatter_info *gs_info)
1821 /* Invariant loads need no special support. */
1822 if (memory_access_type == VMAT_INVARIANT)
1823 return;
1825 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1826 machine_mode vecmode = TYPE_MODE (vectype);
1827 bool is_load = (vls_type == VLS_LOAD);
1828 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1830 if (is_load
1831 ? !vect_load_lanes_supported (vectype, group_size, true)
1832 : !vect_store_lanes_supported (vectype, group_size, true))
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1836 "can't use a fully-masked loop because the"
1837 " target doesn't have an appropriate masked"
1838 " load/store-lanes instruction.\n");
1839 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1840 return;
1842 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1843 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1844 return;
1847 if (memory_access_type == VMAT_GATHER_SCATTER)
1849 internal_fn ifn = (is_load
1850 ? IFN_MASK_GATHER_LOAD
1851 : IFN_MASK_SCATTER_STORE);
1852 tree offset_type = TREE_TYPE (gs_info->offset);
1853 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1854 gs_info->memory_type,
1855 TYPE_SIGN (offset_type),
1856 gs_info->scale))
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860 "can't use a fully-masked loop because the"
1861 " target doesn't have an appropriate masked"
1862 " gather load or scatter store instruction.\n");
1863 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1864 return;
1866 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1867 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1868 return;
1871 if (memory_access_type != VMAT_CONTIGUOUS
1872 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1874 /* Element X of the data must come from iteration i * VF + X of the
1875 scalar loop. We need more work to support other mappings. */
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878 "can't use a fully-masked loop because an access"
1879 " isn't contiguous.\n");
1880 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1881 return;
1884 machine_mode mask_mode;
1885 if (!(targetm.vectorize.get_mask_mode
1886 (GET_MODE_NUNITS (vecmode),
1887 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1888 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1890 if (dump_enabled_p ())
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1892 "can't use a fully-masked loop because the target"
1893 " doesn't have the appropriate masked load or"
1894 " store.\n");
1895 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1896 return;
1898 /* We might load more scalars than we need for permuting SLP loads.
1899 We checked in get_group_load_store_type that the extra elements
1900 don't leak into a new vector. */
1901 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1902 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1903 unsigned int nvectors;
1904 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1905 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1906 else
1907 gcc_unreachable ();
1910 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1911 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1912 that needs to be applied to all loads and stores in a vectorized loop.
1913 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1915 MASK_TYPE is the type of both masks. If new statements are needed,
1916 insert them before GSI. */
1918 static tree
1919 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1920 gimple_stmt_iterator *gsi)
1922 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1923 if (!loop_mask)
1924 return vec_mask;
1926 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1927 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1928 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1929 vec_mask, loop_mask);
1930 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1931 return and_res;
1934 /* Determine whether we can use a gather load or scatter store to vectorize
1935 strided load or store STMT_INFO by truncating the current offset to a
1936 smaller width. We need to be able to construct an offset vector:
1938 { 0, X, X*2, X*3, ... }
1940 without loss of precision, where X is STMT_INFO's DR_STEP.
1942 Return true if this is possible, describing the gather load or scatter
1943 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1945 static bool
1946 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1947 loop_vec_info loop_vinfo, bool masked_p,
1948 gather_scatter_info *gs_info)
1950 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1951 data_reference *dr = dr_info->dr;
1952 tree step = DR_STEP (dr);
1953 if (TREE_CODE (step) != INTEGER_CST)
1955 /* ??? Perhaps we could use range information here? */
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "cannot truncate variable step.\n");
1959 return false;
1962 /* Get the number of bits in an element. */
1963 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1964 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1965 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1967 /* Set COUNT to the upper limit on the number of elements - 1.
1968 Start with the maximum vectorization factor. */
1969 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1971 /* Try lowering COUNT to the number of scalar latch iterations. */
1972 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1973 widest_int max_iters;
1974 if (max_loop_iterations (loop, &max_iters)
1975 && max_iters < count)
1976 count = max_iters.to_shwi ();
1978 /* Try scales of 1 and the element size. */
1979 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1980 wi::overflow_type overflow = wi::OVF_NONE;
1981 for (int i = 0; i < 2; ++i)
1983 int scale = scales[i];
1984 widest_int factor;
1985 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1986 continue;
1988 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1989 in OFFSET_BITS bits. */
1990 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1991 if (overflow)
1992 continue;
1993 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1994 if (wi::min_precision (range, sign) > element_bits)
1996 overflow = wi::OVF_UNKNOWN;
1997 continue;
2000 /* See whether the target supports the operation. */
2001 tree memory_type = TREE_TYPE (DR_REF (dr));
2002 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2003 memory_type, element_bits, sign, scale,
2004 &gs_info->ifn, &gs_info->element_type))
2005 continue;
2007 tree offset_type = build_nonstandard_integer_type (element_bits,
2008 sign == UNSIGNED);
2010 gs_info->decl = NULL_TREE;
2011 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2012 but we don't need to store that here. */
2013 gs_info->base = NULL_TREE;
2014 gs_info->offset = fold_convert (offset_type, step);
2015 gs_info->offset_dt = vect_constant_def;
2016 gs_info->offset_vectype = NULL_TREE;
2017 gs_info->scale = scale;
2018 gs_info->memory_type = memory_type;
2019 return true;
2022 if (overflow && dump_enabled_p ())
2023 dump_printf_loc (MSG_NOTE, vect_location,
2024 "truncating gather/scatter offset to %d bits"
2025 " might change its value.\n", element_bits);
2027 return false;
2030 /* Return true if we can use gather/scatter internal functions to
2031 vectorize STMT_INFO, which is a grouped or strided load or store.
2032 MASKED_P is true if load or store is conditional. When returning
2033 true, fill in GS_INFO with the information required to perform the
2034 operation. */
2036 static bool
2037 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2038 loop_vec_info loop_vinfo, bool masked_p,
2039 gather_scatter_info *gs_info)
2041 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2042 || gs_info->decl)
2043 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2044 masked_p, gs_info);
2046 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2047 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2048 tree offset_type = TREE_TYPE (gs_info->offset);
2049 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2051 /* Enforced by vect_check_gather_scatter. */
2052 gcc_assert (element_bits >= offset_bits);
2054 /* If the elements are wider than the offset, convert the offset to the
2055 same width, without changing its sign. */
2056 if (element_bits > offset_bits)
2058 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2059 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2060 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_NOTE, vect_location,
2065 "using gather/scatter for strided/grouped access,"
2066 " scale = %d\n", gs_info->scale);
2068 return true;
2071 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2072 elements with a known constant step. Return -1 if that step
2073 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2075 static int
2076 compare_step_with_zero (stmt_vec_info stmt_info)
2078 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2079 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2080 size_zero_node);
2083 /* If the target supports a permute mask that reverses the elements in
2084 a vector of type VECTYPE, return that mask, otherwise return null. */
2086 static tree
2087 perm_mask_for_reverse (tree vectype)
2089 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2091 /* The encoding has a single stepped pattern. */
2092 vec_perm_builder sel (nunits, 1, 3);
2093 for (int i = 0; i < 3; ++i)
2094 sel.quick_push (nunits - 1 - i);
2096 vec_perm_indices indices (sel, 1, nunits);
2097 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2098 return NULL_TREE;
2099 return vect_gen_perm_mask_checked (vectype, indices);
2102 /* STMT_INFO is either a masked or unconditional store. Return the value
2103 being stored. */
2105 tree
2106 vect_get_store_rhs (stmt_vec_info stmt_info)
2108 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2110 gcc_assert (gimple_assign_single_p (assign));
2111 return gimple_assign_rhs1 (assign);
2113 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2115 internal_fn ifn = gimple_call_internal_fn (call);
2116 int index = internal_fn_stored_value_index (ifn);
2117 gcc_assert (index >= 0);
2118 return gimple_call_arg (call, index);
2120 gcc_unreachable ();
2123 /* A subroutine of get_load_store_type, with a subset of the same
2124 arguments. Handle the case where STMT_INFO is part of a grouped load
2125 or store.
2127 For stores, the statements in the group are all consecutive
2128 and there is no gap at the end. For loads, the statements in the
2129 group might not be consecutive; there can be gaps between statements
2130 as well as at the end. */
2132 static bool
2133 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2134 bool masked_p, vec_load_store_type vls_type,
2135 vect_memory_access_type *memory_access_type,
2136 gather_scatter_info *gs_info)
2138 vec_info *vinfo = stmt_info->vinfo;
2139 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2140 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2141 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2142 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2143 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2144 bool single_element_p = (stmt_info == first_stmt_info
2145 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2146 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2147 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2149 /* True if the vectorized statements would access beyond the last
2150 statement in the group. */
2151 bool overrun_p = false;
2153 /* True if we can cope with such overrun by peeling for gaps, so that
2154 there is at least one final scalar iteration after the vector loop. */
2155 bool can_overrun_p = (!masked_p
2156 && vls_type == VLS_LOAD
2157 && loop_vinfo
2158 && !loop->inner);
2160 /* There can only be a gap at the end of the group if the stride is
2161 known at compile time. */
2162 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2164 /* Stores can't yet have gaps. */
2165 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2167 if (slp)
2169 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2171 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2172 separated by the stride, until we have a complete vector.
2173 Fall back to scalar accesses if that isn't possible. */
2174 if (multiple_p (nunits, group_size))
2175 *memory_access_type = VMAT_STRIDED_SLP;
2176 else
2177 *memory_access_type = VMAT_ELEMENTWISE;
2179 else
2181 overrun_p = loop_vinfo && gap != 0;
2182 if (overrun_p && vls_type != VLS_LOAD)
2184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2185 "Grouped store with gaps requires"
2186 " non-consecutive accesses\n");
2187 return false;
2189 /* An overrun is fine if the trailing elements are smaller
2190 than the alignment boundary B. Every vector access will
2191 be a multiple of B and so we are guaranteed to access a
2192 non-gap element in the same B-sized block. */
2193 if (overrun_p
2194 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2195 / vect_get_scalar_dr_size (first_dr_info)))
2196 overrun_p = false;
2197 if (overrun_p && !can_overrun_p)
2199 if (dump_enabled_p ())
2200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2201 "Peeling for outer loop is not supported\n");
2202 return false;
2204 *memory_access_type = VMAT_CONTIGUOUS;
2207 else
2209 /* We can always handle this case using elementwise accesses,
2210 but see if something more efficient is available. */
2211 *memory_access_type = VMAT_ELEMENTWISE;
2213 /* If there is a gap at the end of the group then these optimizations
2214 would access excess elements in the last iteration. */
2215 bool would_overrun_p = (gap != 0);
2216 /* An overrun is fine if the trailing elements are smaller than the
2217 alignment boundary B. Every vector access will be a multiple of B
2218 and so we are guaranteed to access a non-gap element in the
2219 same B-sized block. */
2220 if (would_overrun_p
2221 && !masked_p
2222 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2223 / vect_get_scalar_dr_size (first_dr_info)))
2224 would_overrun_p = false;
2226 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2227 && (can_overrun_p || !would_overrun_p)
2228 && compare_step_with_zero (stmt_info) > 0)
2230 /* First cope with the degenerate case of a single-element
2231 vector. */
2232 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2233 *memory_access_type = VMAT_CONTIGUOUS;
2235 /* Otherwise try using LOAD/STORE_LANES. */
2236 if (*memory_access_type == VMAT_ELEMENTWISE
2237 && (vls_type == VLS_LOAD
2238 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2239 : vect_store_lanes_supported (vectype, group_size,
2240 masked_p)))
2242 *memory_access_type = VMAT_LOAD_STORE_LANES;
2243 overrun_p = would_overrun_p;
2246 /* If that fails, try using permuting loads. */
2247 if (*memory_access_type == VMAT_ELEMENTWISE
2248 && (vls_type == VLS_LOAD
2249 ? vect_grouped_load_supported (vectype, single_element_p,
2250 group_size)
2251 : vect_grouped_store_supported (vectype, group_size)))
2253 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2254 overrun_p = would_overrun_p;
2258 /* As a last resort, trying using a gather load or scatter store.
2260 ??? Although the code can handle all group sizes correctly,
2261 it probably isn't a win to use separate strided accesses based
2262 on nearby locations. Or, even if it's a win over scalar code,
2263 it might not be a win over vectorizing at a lower VF, if that
2264 allows us to use contiguous accesses. */
2265 if (*memory_access_type == VMAT_ELEMENTWISE
2266 && single_element_p
2267 && loop_vinfo
2268 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2269 masked_p, gs_info))
2270 *memory_access_type = VMAT_GATHER_SCATTER;
2273 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2275 /* STMT is the leader of the group. Check the operands of all the
2276 stmts of the group. */
2277 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2278 while (next_stmt_info)
2280 tree op = vect_get_store_rhs (next_stmt_info);
2281 enum vect_def_type dt;
2282 if (!vect_is_simple_use (op, vinfo, &dt))
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "use not simple.\n");
2287 return false;
2289 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2293 if (overrun_p)
2295 gcc_assert (can_overrun_p);
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "Data access with gaps requires scalar "
2299 "epilogue loop\n");
2300 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2303 return true;
2306 /* A subroutine of get_load_store_type, with a subset of the same
2307 arguments. Handle the case where STMT_INFO is a load or store that
2308 accesses consecutive elements with a negative step. */
2310 static vect_memory_access_type
2311 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2312 vec_load_store_type vls_type,
2313 unsigned int ncopies)
2315 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2316 dr_alignment_support alignment_support_scheme;
2318 if (ncopies > 1)
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322 "multiple types with negative step.\n");
2323 return VMAT_ELEMENTWISE;
2326 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2327 if (alignment_support_scheme != dr_aligned
2328 && alignment_support_scheme != dr_unaligned_supported)
2330 if (dump_enabled_p ())
2331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2332 "negative step but alignment required.\n");
2333 return VMAT_ELEMENTWISE;
2336 if (vls_type == VLS_STORE_INVARIANT)
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_NOTE, vect_location,
2340 "negative step with invariant source;"
2341 " no permute needed.\n");
2342 return VMAT_CONTIGUOUS_DOWN;
2345 if (!perm_mask_for_reverse (vectype))
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2349 "negative step and reversing not supported.\n");
2350 return VMAT_ELEMENTWISE;
2353 return VMAT_CONTIGUOUS_REVERSE;
2356 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2357 if there is a memory access type that the vectorized form can use,
2358 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2359 or scatters, fill in GS_INFO accordingly.
2361 SLP says whether we're performing SLP rather than loop vectorization.
2362 MASKED_P is true if the statement is conditional on a vectorized mask.
2363 VECTYPE is the vector type that the vectorized statements will use.
2364 NCOPIES is the number of vector statements that will be needed. */
2366 static bool
2367 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2368 bool masked_p, vec_load_store_type vls_type,
2369 unsigned int ncopies,
2370 vect_memory_access_type *memory_access_type,
2371 gather_scatter_info *gs_info)
2373 vec_info *vinfo = stmt_info->vinfo;
2374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2375 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2376 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2378 *memory_access_type = VMAT_GATHER_SCATTER;
2379 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2380 gcc_unreachable ();
2381 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2382 &gs_info->offset_dt,
2383 &gs_info->offset_vectype))
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "%s index use not simple.\n",
2388 vls_type == VLS_LOAD ? "gather" : "scatter");
2389 return false;
2392 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2394 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2395 vls_type, memory_access_type, gs_info))
2396 return false;
2398 else if (STMT_VINFO_STRIDED_P (stmt_info))
2400 gcc_assert (!slp);
2401 if (loop_vinfo
2402 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2403 masked_p, gs_info))
2404 *memory_access_type = VMAT_GATHER_SCATTER;
2405 else
2406 *memory_access_type = VMAT_ELEMENTWISE;
2408 else
2410 int cmp = compare_step_with_zero (stmt_info);
2411 if (cmp < 0)
2412 *memory_access_type = get_negative_load_store_type
2413 (stmt_info, vectype, vls_type, ncopies);
2414 else if (cmp == 0)
2416 gcc_assert (vls_type == VLS_LOAD);
2417 *memory_access_type = VMAT_INVARIANT;
2419 else
2420 *memory_access_type = VMAT_CONTIGUOUS;
2423 if ((*memory_access_type == VMAT_ELEMENTWISE
2424 || *memory_access_type == VMAT_STRIDED_SLP)
2425 && !nunits.is_constant ())
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "Not using elementwise accesses due to variable "
2430 "vectorization factor.\n");
2431 return false;
2434 /* FIXME: At the moment the cost model seems to underestimate the
2435 cost of using elementwise accesses. This check preserves the
2436 traditional behavior until that can be fixed. */
2437 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2438 if (!first_stmt_info)
2439 first_stmt_info = stmt_info;
2440 if (*memory_access_type == VMAT_ELEMENTWISE
2441 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2442 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2443 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2444 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2448 "not falling back to elementwise accesses\n");
2449 return false;
2451 return true;
2454 /* Return true if boolean argument MASK is suitable for vectorizing
2455 conditional load or store STMT_INFO. When returning true, store the type
2456 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2457 in *MASK_VECTYPE_OUT. */
2459 static bool
2460 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2461 vect_def_type *mask_dt_out,
2462 tree *mask_vectype_out)
2464 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2466 if (dump_enabled_p ())
2467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2468 "mask argument is not a boolean.\n");
2469 return false;
2472 if (TREE_CODE (mask) != SSA_NAME)
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2476 "mask argument is not an SSA name.\n");
2477 return false;
2480 enum vect_def_type mask_dt;
2481 tree mask_vectype;
2482 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2486 "mask use not simple.\n");
2487 return false;
2490 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2491 if (!mask_vectype)
2492 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2494 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2498 "could not find an appropriate vector mask type.\n");
2499 return false;
2502 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2503 TYPE_VECTOR_SUBPARTS (vectype)))
2505 if (dump_enabled_p ())
2506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2507 "vector mask type %T",
2508 " does not match vector data type %T.\n",
2509 mask_vectype, vectype);
2511 return false;
2514 *mask_dt_out = mask_dt;
2515 *mask_vectype_out = mask_vectype;
2516 return true;
2519 /* Return true if stored value RHS is suitable for vectorizing store
2520 statement STMT_INFO. When returning true, store the type of the
2521 definition in *RHS_DT_OUT, the type of the vectorized store value in
2522 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2524 static bool
2525 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2526 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2527 vec_load_store_type *vls_type_out)
2529 /* In the case this is a store from a constant make sure
2530 native_encode_expr can handle it. */
2531 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2533 if (dump_enabled_p ())
2534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2535 "cannot encode constant as a byte sequence.\n");
2536 return false;
2539 enum vect_def_type rhs_dt;
2540 tree rhs_vectype;
2541 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "use not simple.\n");
2546 return false;
2549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2550 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2552 if (dump_enabled_p ())
2553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2554 "incompatible vector types.\n");
2555 return false;
2558 *rhs_dt_out = rhs_dt;
2559 *rhs_vectype_out = rhs_vectype;
2560 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2561 *vls_type_out = VLS_STORE_INVARIANT;
2562 else
2563 *vls_type_out = VLS_STORE;
2564 return true;
2567 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2568 Note that we support masks with floating-point type, in which case the
2569 floats are interpreted as a bitmask. */
2571 static tree
2572 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2574 if (TREE_CODE (masktype) == INTEGER_TYPE)
2575 return build_int_cst (masktype, -1);
2576 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2578 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2579 mask = build_vector_from_val (masktype, mask);
2580 return vect_init_vector (stmt_info, mask, masktype, NULL);
2582 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2584 REAL_VALUE_TYPE r;
2585 long tmp[6];
2586 for (int j = 0; j < 6; ++j)
2587 tmp[j] = -1;
2588 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2589 tree mask = build_real (TREE_TYPE (masktype), r);
2590 mask = build_vector_from_val (masktype, mask);
2591 return vect_init_vector (stmt_info, mask, masktype, NULL);
2593 gcc_unreachable ();
2596 /* Build an all-zero merge value of type VECTYPE while vectorizing
2597 STMT_INFO as a gather load. */
2599 static tree
2600 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2602 tree merge;
2603 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2604 merge = build_int_cst (TREE_TYPE (vectype), 0);
2605 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2607 REAL_VALUE_TYPE r;
2608 long tmp[6];
2609 for (int j = 0; j < 6; ++j)
2610 tmp[j] = 0;
2611 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2612 merge = build_real (TREE_TYPE (vectype), r);
2614 else
2615 gcc_unreachable ();
2616 merge = build_vector_from_val (vectype, merge);
2617 return vect_init_vector (stmt_info, merge, vectype, NULL);
2620 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2621 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2622 the gather load operation. If the load is conditional, MASK is the
2623 unvectorized condition and MASK_DT is its definition type, otherwise
2624 MASK is null. */
2626 static void
2627 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2628 gimple_stmt_iterator *gsi,
2629 stmt_vec_info *vec_stmt,
2630 gather_scatter_info *gs_info,
2631 tree mask)
2633 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2634 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2635 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2636 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2637 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2638 edge pe = loop_preheader_edge (loop);
2639 enum { NARROW, NONE, WIDEN } modifier;
2640 poly_uint64 gather_off_nunits
2641 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2643 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2644 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2645 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2646 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2647 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2648 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2649 tree scaletype = TREE_VALUE (arglist);
2650 gcc_checking_assert (types_compatible_p (srctype, rettype)
2651 && (!mask || types_compatible_p (srctype, masktype)));
2653 tree perm_mask = NULL_TREE;
2654 tree mask_perm_mask = NULL_TREE;
2655 if (known_eq (nunits, gather_off_nunits))
2656 modifier = NONE;
2657 else if (known_eq (nunits * 2, gather_off_nunits))
2659 modifier = WIDEN;
2661 /* Currently widening gathers and scatters are only supported for
2662 fixed-length vectors. */
2663 int count = gather_off_nunits.to_constant ();
2664 vec_perm_builder sel (count, count, 1);
2665 for (int i = 0; i < count; ++i)
2666 sel.quick_push (i | (count / 2));
2668 vec_perm_indices indices (sel, 1, count);
2669 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2670 indices);
2672 else if (known_eq (nunits, gather_off_nunits * 2))
2674 modifier = NARROW;
2676 /* Currently narrowing gathers and scatters are only supported for
2677 fixed-length vectors. */
2678 int count = nunits.to_constant ();
2679 vec_perm_builder sel (count, count, 1);
2680 sel.quick_grow (count);
2681 for (int i = 0; i < count; ++i)
2682 sel[i] = i < count / 2 ? i : i + count / 2;
2683 vec_perm_indices indices (sel, 2, count);
2684 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2686 ncopies *= 2;
2688 if (mask)
2690 for (int i = 0; i < count; ++i)
2691 sel[i] = i | (count / 2);
2692 indices.new_vector (sel, 2, count);
2693 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2696 else
2697 gcc_unreachable ();
2699 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2700 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2702 tree ptr = fold_convert (ptrtype, gs_info->base);
2703 if (!is_gimple_min_invariant (ptr))
2705 gimple_seq seq;
2706 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2707 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2708 gcc_assert (!new_bb);
2711 tree scale = build_int_cst (scaletype, gs_info->scale);
2713 tree vec_oprnd0 = NULL_TREE;
2714 tree vec_mask = NULL_TREE;
2715 tree src_op = NULL_TREE;
2716 tree mask_op = NULL_TREE;
2717 tree prev_res = NULL_TREE;
2718 stmt_vec_info prev_stmt_info = NULL;
2720 if (!mask)
2722 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2723 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2726 for (int j = 0; j < ncopies; ++j)
2728 tree op, var;
2729 if (modifier == WIDEN && (j & 1))
2730 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2731 perm_mask, stmt_info, gsi);
2732 else if (j == 0)
2733 op = vec_oprnd0
2734 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2735 else
2736 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2737 vec_oprnd0);
2739 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2741 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2742 TYPE_VECTOR_SUBPARTS (idxtype)));
2743 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2744 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2745 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2746 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2747 op = var;
2750 if (mask)
2752 if (mask_perm_mask && (j & 1))
2753 mask_op = permute_vec_elements (mask_op, mask_op,
2754 mask_perm_mask, stmt_info, gsi);
2755 else
2757 if (j == 0)
2758 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2759 else
2760 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2761 vec_mask);
2763 mask_op = vec_mask;
2764 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2766 gcc_assert
2767 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2768 TYPE_VECTOR_SUBPARTS (masktype)));
2769 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2770 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2771 gassign *new_stmt
2772 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2773 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2774 mask_op = var;
2777 src_op = mask_op;
2780 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2781 mask_op, scale);
2783 stmt_vec_info new_stmt_info;
2784 if (!useless_type_conversion_p (vectype, rettype))
2786 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2787 TYPE_VECTOR_SUBPARTS (rettype)));
2788 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2789 gimple_call_set_lhs (new_call, op);
2790 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2791 var = make_ssa_name (vec_dest);
2792 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2793 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2794 new_stmt_info
2795 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2797 else
2799 var = make_ssa_name (vec_dest, new_call);
2800 gimple_call_set_lhs (new_call, var);
2801 new_stmt_info
2802 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2805 if (modifier == NARROW)
2807 if ((j & 1) == 0)
2809 prev_res = var;
2810 continue;
2812 var = permute_vec_elements (prev_res, var, perm_mask,
2813 stmt_info, gsi);
2814 new_stmt_info = loop_vinfo->lookup_def (var);
2817 if (prev_stmt_info == NULL)
2818 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2819 else
2820 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2821 prev_stmt_info = new_stmt_info;
2825 /* Prepare the base and offset in GS_INFO for vectorization.
2826 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2827 to the vectorized offset argument for the first copy of STMT_INFO.
2828 STMT_INFO is the statement described by GS_INFO and LOOP is the
2829 containing loop. */
2831 static void
2832 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2833 gather_scatter_info *gs_info,
2834 tree *dataref_ptr, tree *vec_offset)
2836 gimple_seq stmts = NULL;
2837 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2838 if (stmts != NULL)
2840 basic_block new_bb;
2841 edge pe = loop_preheader_edge (loop);
2842 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2843 gcc_assert (!new_bb);
2845 tree offset_type = TREE_TYPE (gs_info->offset);
2846 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2847 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2848 offset_vectype);
2851 /* Prepare to implement a grouped or strided load or store using
2852 the gather load or scatter store operation described by GS_INFO.
2853 STMT_INFO is the load or store statement.
2855 Set *DATAREF_BUMP to the amount that should be added to the base
2856 address after each copy of the vectorized statement. Set *VEC_OFFSET
2857 to an invariant offset vector in which element I has the value
2858 I * DR_STEP / SCALE. */
2860 static void
2861 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2862 loop_vec_info loop_vinfo,
2863 gather_scatter_info *gs_info,
2864 tree *dataref_bump, tree *vec_offset)
2866 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2867 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2868 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2869 gimple_seq stmts;
2871 tree bump = size_binop (MULT_EXPR,
2872 fold_convert (sizetype, DR_STEP (dr)),
2873 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2874 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2875 if (stmts)
2876 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2878 /* The offset given in GS_INFO can have pointer type, so use the element
2879 type of the vector instead. */
2880 tree offset_type = TREE_TYPE (gs_info->offset);
2881 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2882 offset_type = TREE_TYPE (offset_vectype);
2884 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2885 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2886 ssize_int (gs_info->scale));
2887 step = fold_convert (offset_type, step);
2888 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2890 /* Create {0, X, X*2, X*3, ...}. */
2891 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2892 build_zero_cst (offset_type), step);
2893 if (stmts)
2894 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2897 /* Return the amount that should be added to a vector pointer to move
2898 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2899 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2900 vectorization. */
2902 static tree
2903 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
2904 vect_memory_access_type memory_access_type)
2906 if (memory_access_type == VMAT_INVARIANT)
2907 return size_zero_node;
2909 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2910 tree step = vect_dr_behavior (dr_info)->step;
2911 if (tree_int_cst_sgn (step) == -1)
2912 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2913 return iv_step;
2916 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2918 static bool
2919 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2920 stmt_vec_info *vec_stmt, slp_tree slp_node,
2921 tree vectype_in, stmt_vector_for_cost *cost_vec)
2923 tree op, vectype;
2924 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2925 vec_info *vinfo = stmt_info->vinfo;
2926 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2927 unsigned ncopies;
2929 op = gimple_call_arg (stmt, 0);
2930 vectype = STMT_VINFO_VECTYPE (stmt_info);
2931 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2933 /* Multiple types in SLP are handled by creating the appropriate number of
2934 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2935 case of SLP. */
2936 if (slp_node)
2937 ncopies = 1;
2938 else
2939 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2941 gcc_assert (ncopies >= 1);
2943 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2944 if (! char_vectype)
2945 return false;
2947 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2948 unsigned word_bytes;
2949 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2950 return false;
2952 /* The encoding uses one stepped pattern for each byte in the word. */
2953 vec_perm_builder elts (num_bytes, word_bytes, 3);
2954 for (unsigned i = 0; i < 3; ++i)
2955 for (unsigned j = 0; j < word_bytes; ++j)
2956 elts.quick_push ((i + 1) * word_bytes - j - 1);
2958 vec_perm_indices indices (elts, 1, num_bytes);
2959 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2960 return false;
2962 if (! vec_stmt)
2964 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2965 DUMP_VECT_SCOPE ("vectorizable_bswap");
2966 if (! slp_node)
2968 record_stmt_cost (cost_vec,
2969 1, vector_stmt, stmt_info, 0, vect_prologue);
2970 record_stmt_cost (cost_vec,
2971 ncopies, vec_perm, stmt_info, 0, vect_body);
2973 return true;
2976 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2978 /* Transform. */
2979 vec<tree> vec_oprnds = vNULL;
2980 stmt_vec_info new_stmt_info = NULL;
2981 stmt_vec_info prev_stmt_info = NULL;
2982 for (unsigned j = 0; j < ncopies; j++)
2984 /* Handle uses. */
2985 if (j == 0)
2986 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
2987 else
2988 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
2990 /* Arguments are ready. create the new vector stmt. */
2991 unsigned i;
2992 tree vop;
2993 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2995 gimple *new_stmt;
2996 tree tem = make_ssa_name (char_vectype);
2997 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2998 char_vectype, vop));
2999 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3000 tree tem2 = make_ssa_name (char_vectype);
3001 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3002 tem, tem, bswap_vconst);
3003 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3004 tem = make_ssa_name (vectype);
3005 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3006 vectype, tem2));
3007 new_stmt_info
3008 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3009 if (slp_node)
3010 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3013 if (slp_node)
3014 continue;
3016 if (j == 0)
3017 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3018 else
3019 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3021 prev_stmt_info = new_stmt_info;
3024 vec_oprnds.release ();
3025 return true;
3028 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3029 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3030 in a single step. On success, store the binary pack code in
3031 *CONVERT_CODE. */
3033 static bool
3034 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3035 tree_code *convert_code)
3037 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3038 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3039 return false;
3041 tree_code code;
3042 int multi_step_cvt = 0;
3043 auto_vec <tree, 8> interm_types;
3044 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3045 &code, &multi_step_cvt,
3046 &interm_types)
3047 || multi_step_cvt)
3048 return false;
3050 *convert_code = code;
3051 return true;
3054 /* Function vectorizable_call.
3056 Check if STMT_INFO performs a function call that can be vectorized.
3057 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3058 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3059 Return true if STMT_INFO is vectorizable in this way. */
3061 static bool
3062 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3063 stmt_vec_info *vec_stmt, slp_tree slp_node,
3064 stmt_vector_for_cost *cost_vec)
3066 gcall *stmt;
3067 tree vec_dest;
3068 tree scalar_dest;
3069 tree op;
3070 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3071 stmt_vec_info prev_stmt_info;
3072 tree vectype_out, vectype_in;
3073 poly_uint64 nunits_in;
3074 poly_uint64 nunits_out;
3075 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3076 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3077 vec_info *vinfo = stmt_info->vinfo;
3078 tree fndecl, new_temp, rhs_type;
3079 enum vect_def_type dt[4]
3080 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3081 vect_unknown_def_type };
3082 int ndts = ARRAY_SIZE (dt);
3083 int ncopies, j;
3084 auto_vec<tree, 8> vargs;
3085 auto_vec<tree, 8> orig_vargs;
3086 enum { NARROW, NONE, WIDEN } modifier;
3087 size_t i, nargs;
3088 tree lhs;
3090 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3091 return false;
3093 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3094 && ! vec_stmt)
3095 return false;
3097 /* Is STMT_INFO a vectorizable call? */
3098 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3099 if (!stmt)
3100 return false;
3102 if (gimple_call_internal_p (stmt)
3103 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3104 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3105 /* Handled by vectorizable_load and vectorizable_store. */
3106 return false;
3108 if (gimple_call_lhs (stmt) == NULL_TREE
3109 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3110 return false;
3112 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3114 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3116 /* Process function arguments. */
3117 rhs_type = NULL_TREE;
3118 vectype_in = NULL_TREE;
3119 nargs = gimple_call_num_args (stmt);
3121 /* Bail out if the function has more than three arguments, we do not have
3122 interesting builtin functions to vectorize with more than two arguments
3123 except for fma. No arguments is also not good. */
3124 if (nargs == 0 || nargs > 4)
3125 return false;
3127 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3128 combined_fn cfn = gimple_call_combined_fn (stmt);
3129 if (cfn == CFN_GOMP_SIMD_LANE)
3131 nargs = 0;
3132 rhs_type = unsigned_type_node;
3135 int mask_opno = -1;
3136 if (internal_fn_p (cfn))
3137 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3139 for (i = 0; i < nargs; i++)
3141 tree opvectype;
3143 op = gimple_call_arg (stmt, i);
3144 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3146 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3148 "use not simple.\n");
3149 return false;
3152 /* Skip the mask argument to an internal function. This operand
3153 has been converted via a pattern if necessary. */
3154 if ((int) i == mask_opno)
3155 continue;
3157 /* We can only handle calls with arguments of the same type. */
3158 if (rhs_type
3159 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3161 if (dump_enabled_p ())
3162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3163 "argument types differ.\n");
3164 return false;
3166 if (!rhs_type)
3167 rhs_type = TREE_TYPE (op);
3169 if (!vectype_in)
3170 vectype_in = opvectype;
3171 else if (opvectype
3172 && opvectype != vectype_in)
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3176 "argument vector types differ.\n");
3177 return false;
3180 /* If all arguments are external or constant defs use a vector type with
3181 the same size as the output vector type. */
3182 if (!vectype_in)
3183 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3184 if (vec_stmt)
3185 gcc_assert (vectype_in);
3186 if (!vectype_in)
3188 if (dump_enabled_p ())
3189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3190 "no vectype for scalar type %T\n", rhs_type);
3192 return false;
3195 /* FORNOW */
3196 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3197 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3198 if (known_eq (nunits_in * 2, nunits_out))
3199 modifier = NARROW;
3200 else if (known_eq (nunits_out, nunits_in))
3201 modifier = NONE;
3202 else if (known_eq (nunits_out * 2, nunits_in))
3203 modifier = WIDEN;
3204 else
3205 return false;
3207 /* We only handle functions that do not read or clobber memory. */
3208 if (gimple_vuse (stmt))
3210 if (dump_enabled_p ())
3211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3212 "function reads from or writes to memory.\n");
3213 return false;
3216 /* For now, we only vectorize functions if a target specific builtin
3217 is available. TODO -- in some cases, it might be profitable to
3218 insert the calls for pieces of the vector, in order to be able
3219 to vectorize other operations in the loop. */
3220 fndecl = NULL_TREE;
3221 internal_fn ifn = IFN_LAST;
3222 tree callee = gimple_call_fndecl (stmt);
3224 /* First try using an internal function. */
3225 tree_code convert_code = ERROR_MARK;
3226 if (cfn != CFN_LAST
3227 && (modifier == NONE
3228 || (modifier == NARROW
3229 && simple_integer_narrowing (vectype_out, vectype_in,
3230 &convert_code))))
3231 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3232 vectype_in);
3234 /* If that fails, try asking for a target-specific built-in function. */
3235 if (ifn == IFN_LAST)
3237 if (cfn != CFN_LAST)
3238 fndecl = targetm.vectorize.builtin_vectorized_function
3239 (cfn, vectype_out, vectype_in);
3240 else if (callee)
3241 fndecl = targetm.vectorize.builtin_md_vectorized_function
3242 (callee, vectype_out, vectype_in);
3245 if (ifn == IFN_LAST && !fndecl)
3247 if (cfn == CFN_GOMP_SIMD_LANE
3248 && !slp_node
3249 && loop_vinfo
3250 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3251 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3252 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3253 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3255 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3256 { 0, 1, 2, ... vf - 1 } vector. */
3257 gcc_assert (nargs == 0);
3259 else if (modifier == NONE
3260 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3261 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3262 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3263 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3264 vectype_in, cost_vec);
3265 else
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3269 "function is not vectorizable.\n");
3270 return false;
3274 if (slp_node)
3275 ncopies = 1;
3276 else if (modifier == NARROW && ifn == IFN_LAST)
3277 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3278 else
3279 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3281 /* Sanity check: make sure that at least one copy of the vectorized stmt
3282 needs to be generated. */
3283 gcc_assert (ncopies >= 1);
3285 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3286 if (!vec_stmt) /* transformation not required. */
3288 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3289 DUMP_VECT_SCOPE ("vectorizable_call");
3290 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3291 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3292 record_stmt_cost (cost_vec, ncopies / 2,
3293 vec_promote_demote, stmt_info, 0, vect_body);
3295 if (loop_vinfo && mask_opno >= 0)
3297 unsigned int nvectors = (slp_node
3298 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3299 : ncopies);
3300 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3302 return true;
3305 /* Transform. */
3307 if (dump_enabled_p ())
3308 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3310 /* Handle def. */
3311 scalar_dest = gimple_call_lhs (stmt);
3312 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3314 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3316 stmt_vec_info new_stmt_info = NULL;
3317 prev_stmt_info = NULL;
3318 if (modifier == NONE || ifn != IFN_LAST)
3320 tree prev_res = NULL_TREE;
3321 vargs.safe_grow (nargs);
3322 orig_vargs.safe_grow (nargs);
3323 for (j = 0; j < ncopies; ++j)
3325 /* Build argument list for the vectorized call. */
3326 if (slp_node)
3328 auto_vec<vec<tree> > vec_defs (nargs);
3329 vec<tree> vec_oprnds0;
3331 for (i = 0; i < nargs; i++)
3332 vargs[i] = gimple_call_arg (stmt, i);
3333 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3334 vec_oprnds0 = vec_defs[0];
3336 /* Arguments are ready. Create the new vector stmt. */
3337 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3339 size_t k;
3340 for (k = 0; k < nargs; k++)
3342 vec<tree> vec_oprndsk = vec_defs[k];
3343 vargs[k] = vec_oprndsk[i];
3345 if (modifier == NARROW)
3347 /* We don't define any narrowing conditional functions
3348 at present. */
3349 gcc_assert (mask_opno < 0);
3350 tree half_res = make_ssa_name (vectype_in);
3351 gcall *call
3352 = gimple_build_call_internal_vec (ifn, vargs);
3353 gimple_call_set_lhs (call, half_res);
3354 gimple_call_set_nothrow (call, true);
3355 new_stmt_info
3356 = vect_finish_stmt_generation (stmt_info, call, gsi);
3357 if ((i & 1) == 0)
3359 prev_res = half_res;
3360 continue;
3362 new_temp = make_ssa_name (vec_dest);
3363 gimple *new_stmt
3364 = gimple_build_assign (new_temp, convert_code,
3365 prev_res, half_res);
3366 new_stmt_info
3367 = vect_finish_stmt_generation (stmt_info, new_stmt,
3368 gsi);
3370 else
3372 if (mask_opno >= 0 && masked_loop_p)
3374 unsigned int vec_num = vec_oprnds0.length ();
3375 /* Always true for SLP. */
3376 gcc_assert (ncopies == 1);
3377 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3378 vectype_out, i);
3379 vargs[mask_opno] = prepare_load_store_mask
3380 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3383 gcall *call;
3384 if (ifn != IFN_LAST)
3385 call = gimple_build_call_internal_vec (ifn, vargs);
3386 else
3387 call = gimple_build_call_vec (fndecl, vargs);
3388 new_temp = make_ssa_name (vec_dest, call);
3389 gimple_call_set_lhs (call, new_temp);
3390 gimple_call_set_nothrow (call, true);
3391 new_stmt_info
3392 = vect_finish_stmt_generation (stmt_info, call, gsi);
3394 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3397 for (i = 0; i < nargs; i++)
3399 vec<tree> vec_oprndsi = vec_defs[i];
3400 vec_oprndsi.release ();
3402 continue;
3405 for (i = 0; i < nargs; i++)
3407 op = gimple_call_arg (stmt, i);
3408 if (j == 0)
3409 vec_oprnd0
3410 = vect_get_vec_def_for_operand (op, stmt_info);
3411 else
3412 vec_oprnd0
3413 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3415 orig_vargs[i] = vargs[i] = vec_oprnd0;
3418 if (mask_opno >= 0 && masked_loop_p)
3420 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3421 vectype_out, j);
3422 vargs[mask_opno]
3423 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3424 vargs[mask_opno], gsi);
3427 if (cfn == CFN_GOMP_SIMD_LANE)
3429 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3430 tree new_var
3431 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3432 gimple *init_stmt = gimple_build_assign (new_var, cst);
3433 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3434 new_temp = make_ssa_name (vec_dest);
3435 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3436 new_stmt_info
3437 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3439 else if (modifier == NARROW)
3441 /* We don't define any narrowing conditional functions at
3442 present. */
3443 gcc_assert (mask_opno < 0);
3444 tree half_res = make_ssa_name (vectype_in);
3445 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3446 gimple_call_set_lhs (call, half_res);
3447 gimple_call_set_nothrow (call, true);
3448 new_stmt_info
3449 = vect_finish_stmt_generation (stmt_info, call, gsi);
3450 if ((j & 1) == 0)
3452 prev_res = half_res;
3453 continue;
3455 new_temp = make_ssa_name (vec_dest);
3456 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3457 prev_res, half_res);
3458 new_stmt_info
3459 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3461 else
3463 gcall *call;
3464 if (ifn != IFN_LAST)
3465 call = gimple_build_call_internal_vec (ifn, vargs);
3466 else
3467 call = gimple_build_call_vec (fndecl, vargs);
3468 new_temp = make_ssa_name (vec_dest, call);
3469 gimple_call_set_lhs (call, new_temp);
3470 gimple_call_set_nothrow (call, true);
3471 new_stmt_info
3472 = vect_finish_stmt_generation (stmt_info, call, gsi);
3475 if (j == (modifier == NARROW ? 1 : 0))
3476 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3477 else
3478 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3480 prev_stmt_info = new_stmt_info;
3483 else if (modifier == NARROW)
3485 /* We don't define any narrowing conditional functions at present. */
3486 gcc_assert (mask_opno < 0);
3487 for (j = 0; j < ncopies; ++j)
3489 /* Build argument list for the vectorized call. */
3490 if (j == 0)
3491 vargs.create (nargs * 2);
3492 else
3493 vargs.truncate (0);
3495 if (slp_node)
3497 auto_vec<vec<tree> > vec_defs (nargs);
3498 vec<tree> vec_oprnds0;
3500 for (i = 0; i < nargs; i++)
3501 vargs.quick_push (gimple_call_arg (stmt, i));
3502 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3503 vec_oprnds0 = vec_defs[0];
3505 /* Arguments are ready. Create the new vector stmt. */
3506 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3508 size_t k;
3509 vargs.truncate (0);
3510 for (k = 0; k < nargs; k++)
3512 vec<tree> vec_oprndsk = vec_defs[k];
3513 vargs.quick_push (vec_oprndsk[i]);
3514 vargs.quick_push (vec_oprndsk[i + 1]);
3516 gcall *call;
3517 if (ifn != IFN_LAST)
3518 call = gimple_build_call_internal_vec (ifn, vargs);
3519 else
3520 call = gimple_build_call_vec (fndecl, vargs);
3521 new_temp = make_ssa_name (vec_dest, call);
3522 gimple_call_set_lhs (call, new_temp);
3523 gimple_call_set_nothrow (call, true);
3524 new_stmt_info
3525 = vect_finish_stmt_generation (stmt_info, call, gsi);
3526 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3529 for (i = 0; i < nargs; i++)
3531 vec<tree> vec_oprndsi = vec_defs[i];
3532 vec_oprndsi.release ();
3534 continue;
3537 for (i = 0; i < nargs; i++)
3539 op = gimple_call_arg (stmt, i);
3540 if (j == 0)
3542 vec_oprnd0
3543 = vect_get_vec_def_for_operand (op, stmt_info);
3544 vec_oprnd1
3545 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3547 else
3549 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3550 2 * i + 1);
3551 vec_oprnd0
3552 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3553 vec_oprnd1
3554 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3557 vargs.quick_push (vec_oprnd0);
3558 vargs.quick_push (vec_oprnd1);
3561 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3562 new_temp = make_ssa_name (vec_dest, new_stmt);
3563 gimple_call_set_lhs (new_stmt, new_temp);
3564 new_stmt_info
3565 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3567 if (j == 0)
3568 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3569 else
3570 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3572 prev_stmt_info = new_stmt_info;
3575 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3577 else
3578 /* No current target implements this case. */
3579 return false;
3581 vargs.release ();
3583 /* The call in STMT might prevent it from being removed in dce.
3584 We however cannot remove it here, due to the way the ssa name
3585 it defines is mapped to the new definition. So just replace
3586 rhs of the statement with something harmless. */
3588 if (slp_node)
3589 return true;
3591 stmt_info = vect_orig_stmt (stmt_info);
3592 lhs = gimple_get_lhs (stmt_info->stmt);
3594 gassign *new_stmt
3595 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3596 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3598 return true;
3602 struct simd_call_arg_info
3604 tree vectype;
3605 tree op;
3606 HOST_WIDE_INT linear_step;
3607 enum vect_def_type dt;
3608 unsigned int align;
3609 bool simd_lane_linear;
3612 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3613 is linear within simd lane (but not within whole loop), note it in
3614 *ARGINFO. */
3616 static void
3617 vect_simd_lane_linear (tree op, struct loop *loop,
3618 struct simd_call_arg_info *arginfo)
3620 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3622 if (!is_gimple_assign (def_stmt)
3623 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3624 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3625 return;
3627 tree base = gimple_assign_rhs1 (def_stmt);
3628 HOST_WIDE_INT linear_step = 0;
3629 tree v = gimple_assign_rhs2 (def_stmt);
3630 while (TREE_CODE (v) == SSA_NAME)
3632 tree t;
3633 def_stmt = SSA_NAME_DEF_STMT (v);
3634 if (is_gimple_assign (def_stmt))
3635 switch (gimple_assign_rhs_code (def_stmt))
3637 case PLUS_EXPR:
3638 t = gimple_assign_rhs2 (def_stmt);
3639 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3640 return;
3641 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3642 v = gimple_assign_rhs1 (def_stmt);
3643 continue;
3644 case MULT_EXPR:
3645 t = gimple_assign_rhs2 (def_stmt);
3646 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3647 return;
3648 linear_step = tree_to_shwi (t);
3649 v = gimple_assign_rhs1 (def_stmt);
3650 continue;
3651 CASE_CONVERT:
3652 t = gimple_assign_rhs1 (def_stmt);
3653 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3654 || (TYPE_PRECISION (TREE_TYPE (v))
3655 < TYPE_PRECISION (TREE_TYPE (t))))
3656 return;
3657 if (!linear_step)
3658 linear_step = 1;
3659 v = t;
3660 continue;
3661 default:
3662 return;
3664 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3665 && loop->simduid
3666 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3667 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3668 == loop->simduid))
3670 if (!linear_step)
3671 linear_step = 1;
3672 arginfo->linear_step = linear_step;
3673 arginfo->op = base;
3674 arginfo->simd_lane_linear = true;
3675 return;
3680 /* Return the number of elements in vector type VECTYPE, which is associated
3681 with a SIMD clone. At present these vectors always have a constant
3682 length. */
3684 static unsigned HOST_WIDE_INT
3685 simd_clone_subparts (tree vectype)
3687 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3690 /* Function vectorizable_simd_clone_call.
3692 Check if STMT_INFO performs a function call that can be vectorized
3693 by calling a simd clone of the function.
3694 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3695 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3696 Return true if STMT_INFO is vectorizable in this way. */
3698 static bool
3699 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3700 gimple_stmt_iterator *gsi,
3701 stmt_vec_info *vec_stmt, slp_tree slp_node,
3702 stmt_vector_for_cost *)
3704 tree vec_dest;
3705 tree scalar_dest;
3706 tree op, type;
3707 tree vec_oprnd0 = NULL_TREE;
3708 stmt_vec_info prev_stmt_info;
3709 tree vectype;
3710 unsigned int nunits;
3711 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3712 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3713 vec_info *vinfo = stmt_info->vinfo;
3714 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3715 tree fndecl, new_temp;
3716 int ncopies, j;
3717 auto_vec<simd_call_arg_info> arginfo;
3718 vec<tree> vargs = vNULL;
3719 size_t i, nargs;
3720 tree lhs, rtype, ratype;
3721 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3723 /* Is STMT a vectorizable call? */
3724 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3725 if (!stmt)
3726 return false;
3728 fndecl = gimple_call_fndecl (stmt);
3729 if (fndecl == NULL_TREE)
3730 return false;
3732 struct cgraph_node *node = cgraph_node::get (fndecl);
3733 if (node == NULL || node->simd_clones == NULL)
3734 return false;
3736 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3737 return false;
3739 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3740 && ! vec_stmt)
3741 return false;
3743 if (gimple_call_lhs (stmt)
3744 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3745 return false;
3747 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3749 vectype = STMT_VINFO_VECTYPE (stmt_info);
3751 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3752 return false;
3754 /* FORNOW */
3755 if (slp_node)
3756 return false;
3758 /* Process function arguments. */
3759 nargs = gimple_call_num_args (stmt);
3761 /* Bail out if the function has zero arguments. */
3762 if (nargs == 0)
3763 return false;
3765 arginfo.reserve (nargs, true);
3767 for (i = 0; i < nargs; i++)
3769 simd_call_arg_info thisarginfo;
3770 affine_iv iv;
3772 thisarginfo.linear_step = 0;
3773 thisarginfo.align = 0;
3774 thisarginfo.op = NULL_TREE;
3775 thisarginfo.simd_lane_linear = false;
3777 op = gimple_call_arg (stmt, i);
3778 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3779 &thisarginfo.vectype)
3780 || thisarginfo.dt == vect_uninitialized_def)
3782 if (dump_enabled_p ())
3783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3784 "use not simple.\n");
3785 return false;
3788 if (thisarginfo.dt == vect_constant_def
3789 || thisarginfo.dt == vect_external_def)
3790 gcc_assert (thisarginfo.vectype == NULL_TREE);
3791 else
3792 gcc_assert (thisarginfo.vectype != NULL_TREE);
3794 /* For linear arguments, the analyze phase should have saved
3795 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3796 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3797 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3799 gcc_assert (vec_stmt);
3800 thisarginfo.linear_step
3801 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3802 thisarginfo.op
3803 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3804 thisarginfo.simd_lane_linear
3805 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3806 == boolean_true_node);
3807 /* If loop has been peeled for alignment, we need to adjust it. */
3808 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3809 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3810 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3812 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3813 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3814 tree opt = TREE_TYPE (thisarginfo.op);
3815 bias = fold_convert (TREE_TYPE (step), bias);
3816 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3817 thisarginfo.op
3818 = fold_build2 (POINTER_TYPE_P (opt)
3819 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3820 thisarginfo.op, bias);
3823 else if (!vec_stmt
3824 && thisarginfo.dt != vect_constant_def
3825 && thisarginfo.dt != vect_external_def
3826 && loop_vinfo
3827 && TREE_CODE (op) == SSA_NAME
3828 && simple_iv (loop, loop_containing_stmt (stmt), op,
3829 &iv, false)
3830 && tree_fits_shwi_p (iv.step))
3832 thisarginfo.linear_step = tree_to_shwi (iv.step);
3833 thisarginfo.op = iv.base;
3835 else if ((thisarginfo.dt == vect_constant_def
3836 || thisarginfo.dt == vect_external_def)
3837 && POINTER_TYPE_P (TREE_TYPE (op)))
3838 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3839 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3840 linear too. */
3841 if (POINTER_TYPE_P (TREE_TYPE (op))
3842 && !thisarginfo.linear_step
3843 && !vec_stmt
3844 && thisarginfo.dt != vect_constant_def
3845 && thisarginfo.dt != vect_external_def
3846 && loop_vinfo
3847 && !slp_node
3848 && TREE_CODE (op) == SSA_NAME)
3849 vect_simd_lane_linear (op, loop, &thisarginfo);
3851 arginfo.quick_push (thisarginfo);
3854 unsigned HOST_WIDE_INT vf;
3855 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3857 if (dump_enabled_p ())
3858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3859 "not considering SIMD clones; not yet supported"
3860 " for variable-width vectors.\n");
3861 return false;
3864 unsigned int badness = 0;
3865 struct cgraph_node *bestn = NULL;
3866 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3867 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3868 else
3869 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3870 n = n->simdclone->next_clone)
3872 unsigned int this_badness = 0;
3873 if (n->simdclone->simdlen > vf
3874 || n->simdclone->nargs != nargs)
3875 continue;
3876 if (n->simdclone->simdlen < vf)
3877 this_badness += (exact_log2 (vf)
3878 - exact_log2 (n->simdclone->simdlen)) * 1024;
3879 if (n->simdclone->inbranch)
3880 this_badness += 2048;
3881 int target_badness = targetm.simd_clone.usable (n);
3882 if (target_badness < 0)
3883 continue;
3884 this_badness += target_badness * 512;
3885 /* FORNOW: Have to add code to add the mask argument. */
3886 if (n->simdclone->inbranch)
3887 continue;
3888 for (i = 0; i < nargs; i++)
3890 switch (n->simdclone->args[i].arg_type)
3892 case SIMD_CLONE_ARG_TYPE_VECTOR:
3893 if (!useless_type_conversion_p
3894 (n->simdclone->args[i].orig_type,
3895 TREE_TYPE (gimple_call_arg (stmt, i))))
3896 i = -1;
3897 else if (arginfo[i].dt == vect_constant_def
3898 || arginfo[i].dt == vect_external_def
3899 || arginfo[i].linear_step)
3900 this_badness += 64;
3901 break;
3902 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3903 if (arginfo[i].dt != vect_constant_def
3904 && arginfo[i].dt != vect_external_def)
3905 i = -1;
3906 break;
3907 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3908 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3909 if (arginfo[i].dt == vect_constant_def
3910 || arginfo[i].dt == vect_external_def
3911 || (arginfo[i].linear_step
3912 != n->simdclone->args[i].linear_step))
3913 i = -1;
3914 break;
3915 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3916 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3917 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3918 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3919 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3920 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3921 /* FORNOW */
3922 i = -1;
3923 break;
3924 case SIMD_CLONE_ARG_TYPE_MASK:
3925 gcc_unreachable ();
3927 if (i == (size_t) -1)
3928 break;
3929 if (n->simdclone->args[i].alignment > arginfo[i].align)
3931 i = -1;
3932 break;
3934 if (arginfo[i].align)
3935 this_badness += (exact_log2 (arginfo[i].align)
3936 - exact_log2 (n->simdclone->args[i].alignment));
3938 if (i == (size_t) -1)
3939 continue;
3940 if (bestn == NULL || this_badness < badness)
3942 bestn = n;
3943 badness = this_badness;
3947 if (bestn == NULL)
3948 return false;
3950 for (i = 0; i < nargs; i++)
3951 if ((arginfo[i].dt == vect_constant_def
3952 || arginfo[i].dt == vect_external_def)
3953 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3955 arginfo[i].vectype
3956 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3957 i)));
3958 if (arginfo[i].vectype == NULL
3959 || (simd_clone_subparts (arginfo[i].vectype)
3960 > bestn->simdclone->simdlen))
3961 return false;
3964 fndecl = bestn->decl;
3965 nunits = bestn->simdclone->simdlen;
3966 ncopies = vf / nunits;
3968 /* If the function isn't const, only allow it in simd loops where user
3969 has asserted that at least nunits consecutive iterations can be
3970 performed using SIMD instructions. */
3971 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3972 && gimple_vuse (stmt))
3973 return false;
3975 /* Sanity check: make sure that at least one copy of the vectorized stmt
3976 needs to be generated. */
3977 gcc_assert (ncopies >= 1);
3979 if (!vec_stmt) /* transformation not required. */
3981 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3982 for (i = 0; i < nargs; i++)
3983 if ((bestn->simdclone->args[i].arg_type
3984 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3985 || (bestn->simdclone->args[i].arg_type
3986 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3988 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3989 + 1);
3990 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3991 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3992 ? size_type_node : TREE_TYPE (arginfo[i].op);
3993 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3994 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3995 tree sll = arginfo[i].simd_lane_linear
3996 ? boolean_true_node : boolean_false_node;
3997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3999 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4000 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4001 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4002 return true;
4005 /* Transform. */
4007 if (dump_enabled_p ())
4008 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4010 /* Handle def. */
4011 scalar_dest = gimple_call_lhs (stmt);
4012 vec_dest = NULL_TREE;
4013 rtype = NULL_TREE;
4014 ratype = NULL_TREE;
4015 if (scalar_dest)
4017 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4018 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4019 if (TREE_CODE (rtype) == ARRAY_TYPE)
4021 ratype = rtype;
4022 rtype = TREE_TYPE (ratype);
4026 prev_stmt_info = NULL;
4027 for (j = 0; j < ncopies; ++j)
4029 /* Build argument list for the vectorized call. */
4030 if (j == 0)
4031 vargs.create (nargs);
4032 else
4033 vargs.truncate (0);
4035 for (i = 0; i < nargs; i++)
4037 unsigned int k, l, m, o;
4038 tree atype;
4039 op = gimple_call_arg (stmt, i);
4040 switch (bestn->simdclone->args[i].arg_type)
4042 case SIMD_CLONE_ARG_TYPE_VECTOR:
4043 atype = bestn->simdclone->args[i].vector_type;
4044 o = nunits / simd_clone_subparts (atype);
4045 for (m = j * o; m < (j + 1) * o; m++)
4047 if (simd_clone_subparts (atype)
4048 < simd_clone_subparts (arginfo[i].vectype))
4050 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4051 k = (simd_clone_subparts (arginfo[i].vectype)
4052 / simd_clone_subparts (atype));
4053 gcc_assert ((k & (k - 1)) == 0);
4054 if (m == 0)
4055 vec_oprnd0
4056 = vect_get_vec_def_for_operand (op, stmt_info);
4057 else
4059 vec_oprnd0 = arginfo[i].op;
4060 if ((m & (k - 1)) == 0)
4061 vec_oprnd0
4062 = vect_get_vec_def_for_stmt_copy (vinfo,
4063 vec_oprnd0);
4065 arginfo[i].op = vec_oprnd0;
4066 vec_oprnd0
4067 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4068 bitsize_int (prec),
4069 bitsize_int ((m & (k - 1)) * prec));
4070 gassign *new_stmt
4071 = gimple_build_assign (make_ssa_name (atype),
4072 vec_oprnd0);
4073 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4074 vargs.safe_push (gimple_assign_lhs (new_stmt));
4076 else
4078 k = (simd_clone_subparts (atype)
4079 / simd_clone_subparts (arginfo[i].vectype));
4080 gcc_assert ((k & (k - 1)) == 0);
4081 vec<constructor_elt, va_gc> *ctor_elts;
4082 if (k != 1)
4083 vec_alloc (ctor_elts, k);
4084 else
4085 ctor_elts = NULL;
4086 for (l = 0; l < k; l++)
4088 if (m == 0 && l == 0)
4089 vec_oprnd0
4090 = vect_get_vec_def_for_operand (op, stmt_info);
4091 else
4092 vec_oprnd0
4093 = vect_get_vec_def_for_stmt_copy (vinfo,
4094 arginfo[i].op);
4095 arginfo[i].op = vec_oprnd0;
4096 if (k == 1)
4097 break;
4098 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4099 vec_oprnd0);
4101 if (k == 1)
4102 vargs.safe_push (vec_oprnd0);
4103 else
4105 vec_oprnd0 = build_constructor (atype, ctor_elts);
4106 gassign *new_stmt
4107 = gimple_build_assign (make_ssa_name (atype),
4108 vec_oprnd0);
4109 vect_finish_stmt_generation (stmt_info, new_stmt,
4110 gsi);
4111 vargs.safe_push (gimple_assign_lhs (new_stmt));
4115 break;
4116 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4117 vargs.safe_push (op);
4118 break;
4119 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4120 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4121 if (j == 0)
4123 gimple_seq stmts;
4124 arginfo[i].op
4125 = force_gimple_operand (arginfo[i].op, &stmts, true,
4126 NULL_TREE);
4127 if (stmts != NULL)
4129 basic_block new_bb;
4130 edge pe = loop_preheader_edge (loop);
4131 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4132 gcc_assert (!new_bb);
4134 if (arginfo[i].simd_lane_linear)
4136 vargs.safe_push (arginfo[i].op);
4137 break;
4139 tree phi_res = copy_ssa_name (op);
4140 gphi *new_phi = create_phi_node (phi_res, loop->header);
4141 loop_vinfo->add_stmt (new_phi);
4142 add_phi_arg (new_phi, arginfo[i].op,
4143 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4144 enum tree_code code
4145 = POINTER_TYPE_P (TREE_TYPE (op))
4146 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4147 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4148 ? sizetype : TREE_TYPE (op);
4149 widest_int cst
4150 = wi::mul (bestn->simdclone->args[i].linear_step,
4151 ncopies * nunits);
4152 tree tcst = wide_int_to_tree (type, cst);
4153 tree phi_arg = copy_ssa_name (op);
4154 gassign *new_stmt
4155 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4156 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4157 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4158 loop_vinfo->add_stmt (new_stmt);
4159 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4160 UNKNOWN_LOCATION);
4161 arginfo[i].op = phi_res;
4162 vargs.safe_push (phi_res);
4164 else
4166 enum tree_code code
4167 = POINTER_TYPE_P (TREE_TYPE (op))
4168 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4169 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4170 ? sizetype : TREE_TYPE (op);
4171 widest_int cst
4172 = wi::mul (bestn->simdclone->args[i].linear_step,
4173 j * nunits);
4174 tree tcst = wide_int_to_tree (type, cst);
4175 new_temp = make_ssa_name (TREE_TYPE (op));
4176 gassign *new_stmt
4177 = gimple_build_assign (new_temp, code,
4178 arginfo[i].op, tcst);
4179 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4180 vargs.safe_push (new_temp);
4182 break;
4183 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4186 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4187 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4188 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4189 default:
4190 gcc_unreachable ();
4194 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4195 if (vec_dest)
4197 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4198 if (ratype)
4199 new_temp = create_tmp_var (ratype);
4200 else if (simd_clone_subparts (vectype)
4201 == simd_clone_subparts (rtype))
4202 new_temp = make_ssa_name (vec_dest, new_call);
4203 else
4204 new_temp = make_ssa_name (rtype, new_call);
4205 gimple_call_set_lhs (new_call, new_temp);
4207 stmt_vec_info new_stmt_info
4208 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4210 if (vec_dest)
4212 if (simd_clone_subparts (vectype) < nunits)
4214 unsigned int k, l;
4215 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4216 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4217 k = nunits / simd_clone_subparts (vectype);
4218 gcc_assert ((k & (k - 1)) == 0);
4219 for (l = 0; l < k; l++)
4221 tree t;
4222 if (ratype)
4224 t = build_fold_addr_expr (new_temp);
4225 t = build2 (MEM_REF, vectype, t,
4226 build_int_cst (TREE_TYPE (t), l * bytes));
4228 else
4229 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4230 bitsize_int (prec), bitsize_int (l * prec));
4231 gimple *new_stmt
4232 = gimple_build_assign (make_ssa_name (vectype), t);
4233 new_stmt_info
4234 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4236 if (j == 0 && l == 0)
4237 STMT_VINFO_VEC_STMT (stmt_info)
4238 = *vec_stmt = new_stmt_info;
4239 else
4240 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4242 prev_stmt_info = new_stmt_info;
4245 if (ratype)
4246 vect_clobber_variable (stmt_info, gsi, new_temp);
4247 continue;
4249 else if (simd_clone_subparts (vectype) > nunits)
4251 unsigned int k = (simd_clone_subparts (vectype)
4252 / simd_clone_subparts (rtype));
4253 gcc_assert ((k & (k - 1)) == 0);
4254 if ((j & (k - 1)) == 0)
4255 vec_alloc (ret_ctor_elts, k);
4256 if (ratype)
4258 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4259 for (m = 0; m < o; m++)
4261 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4262 size_int (m), NULL_TREE, NULL_TREE);
4263 gimple *new_stmt
4264 = gimple_build_assign (make_ssa_name (rtype), tem);
4265 new_stmt_info
4266 = vect_finish_stmt_generation (stmt_info, new_stmt,
4267 gsi);
4268 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4269 gimple_assign_lhs (new_stmt));
4271 vect_clobber_variable (stmt_info, gsi, new_temp);
4273 else
4274 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4275 if ((j & (k - 1)) != k - 1)
4276 continue;
4277 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4278 gimple *new_stmt
4279 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4280 new_stmt_info
4281 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4283 if ((unsigned) j == k - 1)
4284 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4285 else
4286 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4288 prev_stmt_info = new_stmt_info;
4289 continue;
4291 else if (ratype)
4293 tree t = build_fold_addr_expr (new_temp);
4294 t = build2 (MEM_REF, vectype, t,
4295 build_int_cst (TREE_TYPE (t), 0));
4296 gimple *new_stmt
4297 = gimple_build_assign (make_ssa_name (vec_dest), t);
4298 new_stmt_info
4299 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4300 vect_clobber_variable (stmt_info, gsi, new_temp);
4304 if (j == 0)
4305 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4306 else
4307 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4309 prev_stmt_info = new_stmt_info;
4312 vargs.release ();
4314 /* The call in STMT might prevent it from being removed in dce.
4315 We however cannot remove it here, due to the way the ssa name
4316 it defines is mapped to the new definition. So just replace
4317 rhs of the statement with something harmless. */
4319 if (slp_node)
4320 return true;
4322 gimple *new_stmt;
4323 if (scalar_dest)
4325 type = TREE_TYPE (scalar_dest);
4326 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4327 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4329 else
4330 new_stmt = gimple_build_nop ();
4331 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4332 unlink_stmt_vdef (stmt);
4334 return true;
4338 /* Function vect_gen_widened_results_half
4340 Create a vector stmt whose code, type, number of arguments, and result
4341 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4342 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4343 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4344 needs to be created (DECL is a function-decl of a target-builtin).
4345 STMT_INFO is the original scalar stmt that we are vectorizing. */
4347 static gimple *
4348 vect_gen_widened_results_half (enum tree_code code,
4349 tree decl,
4350 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4351 tree vec_dest, gimple_stmt_iterator *gsi,
4352 stmt_vec_info stmt_info)
4354 gimple *new_stmt;
4355 tree new_temp;
4357 /* Generate half of the widened result: */
4358 if (code == CALL_EXPR)
4360 /* Target specific support */
4361 if (op_type == binary_op)
4362 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4363 else
4364 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4365 new_temp = make_ssa_name (vec_dest, new_stmt);
4366 gimple_call_set_lhs (new_stmt, new_temp);
4368 else
4370 /* Generic support */
4371 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4372 if (op_type != binary_op)
4373 vec_oprnd1 = NULL;
4374 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4375 new_temp = make_ssa_name (vec_dest, new_stmt);
4376 gimple_assign_set_lhs (new_stmt, new_temp);
4378 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4380 return new_stmt;
4384 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4385 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4386 containing scalar operand), and for the rest we get a copy with
4387 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4388 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4389 The vectors are collected into VEC_OPRNDS. */
4391 static void
4392 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4393 vec<tree> *vec_oprnds, int multi_step_cvt)
4395 vec_info *vinfo = stmt_info->vinfo;
4396 tree vec_oprnd;
4398 /* Get first vector operand. */
4399 /* All the vector operands except the very first one (that is scalar oprnd)
4400 are stmt copies. */
4401 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4402 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4403 else
4404 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4406 vec_oprnds->quick_push (vec_oprnd);
4408 /* Get second vector operand. */
4409 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4410 vec_oprnds->quick_push (vec_oprnd);
4412 *oprnd = vec_oprnd;
4414 /* For conversion in multiple steps, continue to get operands
4415 recursively. */
4416 if (multi_step_cvt)
4417 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4418 multi_step_cvt - 1);
4422 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4423 For multi-step conversions store the resulting vectors and call the function
4424 recursively. */
4426 static void
4427 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4428 int multi_step_cvt,
4429 stmt_vec_info stmt_info,
4430 vec<tree> vec_dsts,
4431 gimple_stmt_iterator *gsi,
4432 slp_tree slp_node, enum tree_code code,
4433 stmt_vec_info *prev_stmt_info)
4435 unsigned int i;
4436 tree vop0, vop1, new_tmp, vec_dest;
4438 vec_dest = vec_dsts.pop ();
4440 for (i = 0; i < vec_oprnds->length (); i += 2)
4442 /* Create demotion operation. */
4443 vop0 = (*vec_oprnds)[i];
4444 vop1 = (*vec_oprnds)[i + 1];
4445 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4446 new_tmp = make_ssa_name (vec_dest, new_stmt);
4447 gimple_assign_set_lhs (new_stmt, new_tmp);
4448 stmt_vec_info new_stmt_info
4449 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4451 if (multi_step_cvt)
4452 /* Store the resulting vector for next recursive call. */
4453 (*vec_oprnds)[i/2] = new_tmp;
4454 else
4456 /* This is the last step of the conversion sequence. Store the
4457 vectors in SLP_NODE or in vector info of the scalar statement
4458 (or in STMT_VINFO_RELATED_STMT chain). */
4459 if (slp_node)
4460 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4461 else
4463 if (!*prev_stmt_info)
4464 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4465 else
4466 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4468 *prev_stmt_info = new_stmt_info;
4473 /* For multi-step demotion operations we first generate demotion operations
4474 from the source type to the intermediate types, and then combine the
4475 results (stored in VEC_OPRNDS) in demotion operation to the destination
4476 type. */
4477 if (multi_step_cvt)
4479 /* At each level of recursion we have half of the operands we had at the
4480 previous level. */
4481 vec_oprnds->truncate ((i+1)/2);
4482 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4483 stmt_info, vec_dsts, gsi,
4484 slp_node, VEC_PACK_TRUNC_EXPR,
4485 prev_stmt_info);
4488 vec_dsts.quick_push (vec_dest);
4492 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4493 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4494 STMT_INFO. For multi-step conversions store the resulting vectors and
4495 call the function recursively. */
4497 static void
4498 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4499 vec<tree> *vec_oprnds1,
4500 stmt_vec_info stmt_info, tree vec_dest,
4501 gimple_stmt_iterator *gsi,
4502 enum tree_code code1,
4503 enum tree_code code2, tree decl1,
4504 tree decl2, int op_type)
4506 int i;
4507 tree vop0, vop1, new_tmp1, new_tmp2;
4508 gimple *new_stmt1, *new_stmt2;
4509 vec<tree> vec_tmp = vNULL;
4511 vec_tmp.create (vec_oprnds0->length () * 2);
4512 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4514 if (op_type == binary_op)
4515 vop1 = (*vec_oprnds1)[i];
4516 else
4517 vop1 = NULL_TREE;
4519 /* Generate the two halves of promotion operation. */
4520 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4521 op_type, vec_dest, gsi,
4522 stmt_info);
4523 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4524 op_type, vec_dest, gsi,
4525 stmt_info);
4526 if (is_gimple_call (new_stmt1))
4528 new_tmp1 = gimple_call_lhs (new_stmt1);
4529 new_tmp2 = gimple_call_lhs (new_stmt2);
4531 else
4533 new_tmp1 = gimple_assign_lhs (new_stmt1);
4534 new_tmp2 = gimple_assign_lhs (new_stmt2);
4537 /* Store the results for the next step. */
4538 vec_tmp.quick_push (new_tmp1);
4539 vec_tmp.quick_push (new_tmp2);
4542 vec_oprnds0->release ();
4543 *vec_oprnds0 = vec_tmp;
4547 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4548 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4549 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4550 Return true if STMT_INFO is vectorizable in this way. */
4552 static bool
4553 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4554 stmt_vec_info *vec_stmt, slp_tree slp_node,
4555 stmt_vector_for_cost *cost_vec)
4557 tree vec_dest;
4558 tree scalar_dest;
4559 tree op0, op1 = NULL_TREE;
4560 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4561 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4562 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4563 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4564 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4565 tree new_temp;
4566 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4567 int ndts = 2;
4568 stmt_vec_info prev_stmt_info;
4569 poly_uint64 nunits_in;
4570 poly_uint64 nunits_out;
4571 tree vectype_out, vectype_in;
4572 int ncopies, i, j;
4573 tree lhs_type, rhs_type;
4574 enum { NARROW, NONE, WIDEN } modifier;
4575 vec<tree> vec_oprnds0 = vNULL;
4576 vec<tree> vec_oprnds1 = vNULL;
4577 tree vop0;
4578 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4579 vec_info *vinfo = stmt_info->vinfo;
4580 int multi_step_cvt = 0;
4581 vec<tree> interm_types = vNULL;
4582 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4583 int op_type;
4584 unsigned short fltsz;
4586 /* Is STMT a vectorizable conversion? */
4588 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4589 return false;
4591 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4592 && ! vec_stmt)
4593 return false;
4595 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4596 if (!stmt)
4597 return false;
4599 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4600 return false;
4602 code = gimple_assign_rhs_code (stmt);
4603 if (!CONVERT_EXPR_CODE_P (code)
4604 && code != FIX_TRUNC_EXPR
4605 && code != FLOAT_EXPR
4606 && code != WIDEN_MULT_EXPR
4607 && code != WIDEN_LSHIFT_EXPR)
4608 return false;
4610 op_type = TREE_CODE_LENGTH (code);
4612 /* Check types of lhs and rhs. */
4613 scalar_dest = gimple_assign_lhs (stmt);
4614 lhs_type = TREE_TYPE (scalar_dest);
4615 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4617 op0 = gimple_assign_rhs1 (stmt);
4618 rhs_type = TREE_TYPE (op0);
4620 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4621 && !((INTEGRAL_TYPE_P (lhs_type)
4622 && INTEGRAL_TYPE_P (rhs_type))
4623 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4624 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4625 return false;
4627 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4628 && ((INTEGRAL_TYPE_P (lhs_type)
4629 && !type_has_mode_precision_p (lhs_type))
4630 || (INTEGRAL_TYPE_P (rhs_type)
4631 && !type_has_mode_precision_p (rhs_type))))
4633 if (dump_enabled_p ())
4634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4635 "type conversion to/from bit-precision unsupported."
4636 "\n");
4637 return false;
4640 /* Check the operands of the operation. */
4641 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4643 if (dump_enabled_p ())
4644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4645 "use not simple.\n");
4646 return false;
4648 if (op_type == binary_op)
4650 bool ok;
4652 op1 = gimple_assign_rhs2 (stmt);
4653 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4654 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4655 OP1. */
4656 if (CONSTANT_CLASS_P (op0))
4657 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4658 else
4659 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4661 if (!ok)
4663 if (dump_enabled_p ())
4664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4665 "use not simple.\n");
4666 return false;
4670 /* If op0 is an external or constant defs use a vector type of
4671 the same size as the output vector type. */
4672 if (!vectype_in)
4673 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4674 if (vec_stmt)
4675 gcc_assert (vectype_in);
4676 if (!vectype_in)
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4680 "no vectype for scalar type %T\n", rhs_type);
4682 return false;
4685 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4686 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4690 "can't convert between boolean and non "
4691 "boolean vectors %T\n", rhs_type);
4693 return false;
4696 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4697 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4698 if (known_eq (nunits_out, nunits_in))
4699 modifier = NONE;
4700 else if (multiple_p (nunits_out, nunits_in))
4701 modifier = NARROW;
4702 else
4704 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4705 modifier = WIDEN;
4708 /* Multiple types in SLP are handled by creating the appropriate number of
4709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4710 case of SLP. */
4711 if (slp_node)
4712 ncopies = 1;
4713 else if (modifier == NARROW)
4714 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4715 else
4716 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4718 /* Sanity check: make sure that at least one copy of the vectorized stmt
4719 needs to be generated. */
4720 gcc_assert (ncopies >= 1);
4722 bool found_mode = false;
4723 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4724 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4725 opt_scalar_mode rhs_mode_iter;
4727 /* Supportable by target? */
4728 switch (modifier)
4730 case NONE:
4731 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4732 return false;
4733 if (supportable_convert_operation (code, vectype_out, vectype_in,
4734 &decl1, &code1))
4735 break;
4736 /* FALLTHRU */
4737 unsupported:
4738 if (dump_enabled_p ())
4739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4740 "conversion not supported by target.\n");
4741 return false;
4743 case WIDEN:
4744 if (supportable_widening_operation (code, stmt_info, vectype_out,
4745 vectype_in, &code1, &code2,
4746 &multi_step_cvt, &interm_types))
4748 /* Binary widening operation can only be supported directly by the
4749 architecture. */
4750 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4751 break;
4754 if (code != FLOAT_EXPR
4755 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4756 goto unsupported;
4758 fltsz = GET_MODE_SIZE (lhs_mode);
4759 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4761 rhs_mode = rhs_mode_iter.require ();
4762 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4763 break;
4765 cvt_type
4766 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4767 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4768 if (cvt_type == NULL_TREE)
4769 goto unsupported;
4771 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4773 if (!supportable_convert_operation (code, vectype_out,
4774 cvt_type, &decl1, &codecvt1))
4775 goto unsupported;
4777 else if (!supportable_widening_operation (code, stmt_info,
4778 vectype_out, cvt_type,
4779 &codecvt1, &codecvt2,
4780 &multi_step_cvt,
4781 &interm_types))
4782 continue;
4783 else
4784 gcc_assert (multi_step_cvt == 0);
4786 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4787 vectype_in, &code1, &code2,
4788 &multi_step_cvt, &interm_types))
4790 found_mode = true;
4791 break;
4795 if (!found_mode)
4796 goto unsupported;
4798 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4799 codecvt2 = ERROR_MARK;
4800 else
4802 multi_step_cvt++;
4803 interm_types.safe_push (cvt_type);
4804 cvt_type = NULL_TREE;
4806 break;
4808 case NARROW:
4809 gcc_assert (op_type == unary_op);
4810 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4811 &code1, &multi_step_cvt,
4812 &interm_types))
4813 break;
4815 if (code != FIX_TRUNC_EXPR
4816 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4817 goto unsupported;
4819 cvt_type
4820 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4821 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4822 if (cvt_type == NULL_TREE)
4823 goto unsupported;
4824 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4825 &decl1, &codecvt1))
4826 goto unsupported;
4827 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4828 &code1, &multi_step_cvt,
4829 &interm_types))
4830 break;
4831 goto unsupported;
4833 default:
4834 gcc_unreachable ();
4837 if (!vec_stmt) /* transformation not required. */
4839 DUMP_VECT_SCOPE ("vectorizable_conversion");
4840 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4842 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4843 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4844 cost_vec);
4846 else if (modifier == NARROW)
4848 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4849 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4850 cost_vec);
4852 else
4854 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4855 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4856 cost_vec);
4858 interm_types.release ();
4859 return true;
4862 /* Transform. */
4863 if (dump_enabled_p ())
4864 dump_printf_loc (MSG_NOTE, vect_location,
4865 "transform conversion. ncopies = %d.\n", ncopies);
4867 if (op_type == binary_op)
4869 if (CONSTANT_CLASS_P (op0))
4870 op0 = fold_convert (TREE_TYPE (op1), op0);
4871 else if (CONSTANT_CLASS_P (op1))
4872 op1 = fold_convert (TREE_TYPE (op0), op1);
4875 /* In case of multi-step conversion, we first generate conversion operations
4876 to the intermediate types, and then from that types to the final one.
4877 We create vector destinations for the intermediate type (TYPES) received
4878 from supportable_*_operation, and store them in the correct order
4879 for future use in vect_create_vectorized_*_stmts (). */
4880 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4881 vec_dest = vect_create_destination_var (scalar_dest,
4882 (cvt_type && modifier == WIDEN)
4883 ? cvt_type : vectype_out);
4884 vec_dsts.quick_push (vec_dest);
4886 if (multi_step_cvt)
4888 for (i = interm_types.length () - 1;
4889 interm_types.iterate (i, &intermediate_type); i--)
4891 vec_dest = vect_create_destination_var (scalar_dest,
4892 intermediate_type);
4893 vec_dsts.quick_push (vec_dest);
4897 if (cvt_type)
4898 vec_dest = vect_create_destination_var (scalar_dest,
4899 modifier == WIDEN
4900 ? vectype_out : cvt_type);
4902 if (!slp_node)
4904 if (modifier == WIDEN)
4906 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4907 if (op_type == binary_op)
4908 vec_oprnds1.create (1);
4910 else if (modifier == NARROW)
4911 vec_oprnds0.create (
4912 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4914 else if (code == WIDEN_LSHIFT_EXPR)
4915 vec_oprnds1.create (slp_node->vec_stmts_size);
4917 last_oprnd = op0;
4918 prev_stmt_info = NULL;
4919 switch (modifier)
4921 case NONE:
4922 for (j = 0; j < ncopies; j++)
4924 if (j == 0)
4925 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
4926 NULL, slp_node);
4927 else
4928 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
4930 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4932 stmt_vec_info new_stmt_info;
4933 /* Arguments are ready, create the new vector stmt. */
4934 if (code1 == CALL_EXPR)
4936 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4937 new_temp = make_ssa_name (vec_dest, new_stmt);
4938 gimple_call_set_lhs (new_stmt, new_temp);
4939 new_stmt_info
4940 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4942 else
4944 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4945 gassign *new_stmt
4946 = gimple_build_assign (vec_dest, code1, vop0);
4947 new_temp = make_ssa_name (vec_dest, new_stmt);
4948 gimple_assign_set_lhs (new_stmt, new_temp);
4949 new_stmt_info
4950 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4953 if (slp_node)
4954 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4955 else
4957 if (!prev_stmt_info)
4958 STMT_VINFO_VEC_STMT (stmt_info)
4959 = *vec_stmt = new_stmt_info;
4960 else
4961 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4962 prev_stmt_info = new_stmt_info;
4966 break;
4968 case WIDEN:
4969 /* In case the vectorization factor (VF) is bigger than the number
4970 of elements that we can fit in a vectype (nunits), we have to
4971 generate more than one vector stmt - i.e - we need to "unroll"
4972 the vector stmt by a factor VF/nunits. */
4973 for (j = 0; j < ncopies; j++)
4975 /* Handle uses. */
4976 if (j == 0)
4978 if (slp_node)
4980 if (code == WIDEN_LSHIFT_EXPR)
4982 unsigned int k;
4984 vec_oprnd1 = op1;
4985 /* Store vec_oprnd1 for every vector stmt to be created
4986 for SLP_NODE. We check during the analysis that all
4987 the shift arguments are the same. */
4988 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4989 vec_oprnds1.quick_push (vec_oprnd1);
4991 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
4992 &vec_oprnds0, NULL, slp_node);
4994 else
4995 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
4996 &vec_oprnds1, slp_node);
4998 else
5000 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5001 vec_oprnds0.quick_push (vec_oprnd0);
5002 if (op_type == binary_op)
5004 if (code == WIDEN_LSHIFT_EXPR)
5005 vec_oprnd1 = op1;
5006 else
5007 vec_oprnd1
5008 = vect_get_vec_def_for_operand (op1, stmt_info);
5009 vec_oprnds1.quick_push (vec_oprnd1);
5013 else
5015 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5016 vec_oprnds0.truncate (0);
5017 vec_oprnds0.quick_push (vec_oprnd0);
5018 if (op_type == binary_op)
5020 if (code == WIDEN_LSHIFT_EXPR)
5021 vec_oprnd1 = op1;
5022 else
5023 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5024 vec_oprnd1);
5025 vec_oprnds1.truncate (0);
5026 vec_oprnds1.quick_push (vec_oprnd1);
5030 /* Arguments are ready. Create the new vector stmts. */
5031 for (i = multi_step_cvt; i >= 0; i--)
5033 tree this_dest = vec_dsts[i];
5034 enum tree_code c1 = code1, c2 = code2;
5035 if (i == 0 && codecvt2 != ERROR_MARK)
5037 c1 = codecvt1;
5038 c2 = codecvt2;
5040 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5041 &vec_oprnds1, stmt_info,
5042 this_dest, gsi,
5043 c1, c2, decl1, decl2,
5044 op_type);
5047 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5049 stmt_vec_info new_stmt_info;
5050 if (cvt_type)
5052 if (codecvt1 == CALL_EXPR)
5054 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5055 new_temp = make_ssa_name (vec_dest, new_stmt);
5056 gimple_call_set_lhs (new_stmt, new_temp);
5057 new_stmt_info
5058 = vect_finish_stmt_generation (stmt_info, new_stmt,
5059 gsi);
5061 else
5063 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5064 new_temp = make_ssa_name (vec_dest);
5065 gassign *new_stmt
5066 = gimple_build_assign (new_temp, codecvt1, vop0);
5067 new_stmt_info
5068 = vect_finish_stmt_generation (stmt_info, new_stmt,
5069 gsi);
5072 else
5073 new_stmt_info = vinfo->lookup_def (vop0);
5075 if (slp_node)
5076 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5077 else
5079 if (!prev_stmt_info)
5080 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5081 else
5082 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5083 prev_stmt_info = new_stmt_info;
5088 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5089 break;
5091 case NARROW:
5092 /* In case the vectorization factor (VF) is bigger than the number
5093 of elements that we can fit in a vectype (nunits), we have to
5094 generate more than one vector stmt - i.e - we need to "unroll"
5095 the vector stmt by a factor VF/nunits. */
5096 for (j = 0; j < ncopies; j++)
5098 /* Handle uses. */
5099 if (slp_node)
5100 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5101 slp_node);
5102 else
5104 vec_oprnds0.truncate (0);
5105 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5106 vect_pow2 (multi_step_cvt) - 1);
5109 /* Arguments are ready. Create the new vector stmts. */
5110 if (cvt_type)
5111 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5113 if (codecvt1 == CALL_EXPR)
5115 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5116 new_temp = make_ssa_name (vec_dest, new_stmt);
5117 gimple_call_set_lhs (new_stmt, new_temp);
5118 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5120 else
5122 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5123 new_temp = make_ssa_name (vec_dest);
5124 gassign *new_stmt
5125 = gimple_build_assign (new_temp, codecvt1, vop0);
5126 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5129 vec_oprnds0[i] = new_temp;
5132 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5133 stmt_info, vec_dsts, gsi,
5134 slp_node, code1,
5135 &prev_stmt_info);
5138 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5139 break;
5142 vec_oprnds0.release ();
5143 vec_oprnds1.release ();
5144 interm_types.release ();
5146 return true;
5150 /* Function vectorizable_assignment.
5152 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5153 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5154 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5155 Return true if STMT_INFO is vectorizable in this way. */
5157 static bool
5158 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5159 stmt_vec_info *vec_stmt, slp_tree slp_node,
5160 stmt_vector_for_cost *cost_vec)
5162 tree vec_dest;
5163 tree scalar_dest;
5164 tree op;
5165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5166 tree new_temp;
5167 enum vect_def_type dt[1] = {vect_unknown_def_type};
5168 int ndts = 1;
5169 int ncopies;
5170 int i, j;
5171 vec<tree> vec_oprnds = vNULL;
5172 tree vop;
5173 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5174 vec_info *vinfo = stmt_info->vinfo;
5175 stmt_vec_info prev_stmt_info = NULL;
5176 enum tree_code code;
5177 tree vectype_in;
5179 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5180 return false;
5182 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5183 && ! vec_stmt)
5184 return false;
5186 /* Is vectorizable assignment? */
5187 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5188 if (!stmt)
5189 return false;
5191 scalar_dest = gimple_assign_lhs (stmt);
5192 if (TREE_CODE (scalar_dest) != SSA_NAME)
5193 return false;
5195 code = gimple_assign_rhs_code (stmt);
5196 if (gimple_assign_single_p (stmt)
5197 || code == PAREN_EXPR
5198 || CONVERT_EXPR_CODE_P (code))
5199 op = gimple_assign_rhs1 (stmt);
5200 else
5201 return false;
5203 if (code == VIEW_CONVERT_EXPR)
5204 op = TREE_OPERAND (op, 0);
5206 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5207 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5209 /* Multiple types in SLP are handled by creating the appropriate number of
5210 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5211 case of SLP. */
5212 if (slp_node)
5213 ncopies = 1;
5214 else
5215 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5217 gcc_assert (ncopies >= 1);
5219 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5221 if (dump_enabled_p ())
5222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5223 "use not simple.\n");
5224 return false;
5227 /* We can handle NOP_EXPR conversions that do not change the number
5228 of elements or the vector size. */
5229 if ((CONVERT_EXPR_CODE_P (code)
5230 || code == VIEW_CONVERT_EXPR)
5231 && (!vectype_in
5232 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5233 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5234 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5235 return false;
5237 /* We do not handle bit-precision changes. */
5238 if ((CONVERT_EXPR_CODE_P (code)
5239 || code == VIEW_CONVERT_EXPR)
5240 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5241 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5242 || !type_has_mode_precision_p (TREE_TYPE (op)))
5243 /* But a conversion that does not change the bit-pattern is ok. */
5244 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5245 > TYPE_PRECISION (TREE_TYPE (op)))
5246 && TYPE_UNSIGNED (TREE_TYPE (op)))
5247 /* Conversion between boolean types of different sizes is
5248 a simple assignment in case their vectypes are same
5249 boolean vectors. */
5250 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5251 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5253 if (dump_enabled_p ())
5254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5255 "type conversion to/from bit-precision "
5256 "unsupported.\n");
5257 return false;
5260 if (!vec_stmt) /* transformation not required. */
5262 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5263 DUMP_VECT_SCOPE ("vectorizable_assignment");
5264 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5265 return true;
5268 /* Transform. */
5269 if (dump_enabled_p ())
5270 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5272 /* Handle def. */
5273 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5275 /* Handle use. */
5276 for (j = 0; j < ncopies; j++)
5278 /* Handle uses. */
5279 if (j == 0)
5280 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5281 else
5282 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5284 /* Arguments are ready. create the new vector stmt. */
5285 stmt_vec_info new_stmt_info = NULL;
5286 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5288 if (CONVERT_EXPR_CODE_P (code)
5289 || code == VIEW_CONVERT_EXPR)
5290 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5291 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5292 new_temp = make_ssa_name (vec_dest, new_stmt);
5293 gimple_assign_set_lhs (new_stmt, new_temp);
5294 new_stmt_info
5295 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5296 if (slp_node)
5297 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5300 if (slp_node)
5301 continue;
5303 if (j == 0)
5304 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5305 else
5306 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5308 prev_stmt_info = new_stmt_info;
5311 vec_oprnds.release ();
5312 return true;
5316 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5317 either as shift by a scalar or by a vector. */
5319 bool
5320 vect_supportable_shift (enum tree_code code, tree scalar_type)
5323 machine_mode vec_mode;
5324 optab optab;
5325 int icode;
5326 tree vectype;
5328 vectype = get_vectype_for_scalar_type (scalar_type);
5329 if (!vectype)
5330 return false;
5332 optab = optab_for_tree_code (code, vectype, optab_scalar);
5333 if (!optab
5334 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5336 optab = optab_for_tree_code (code, vectype, optab_vector);
5337 if (!optab
5338 || (optab_handler (optab, TYPE_MODE (vectype))
5339 == CODE_FOR_nothing))
5340 return false;
5343 vec_mode = TYPE_MODE (vectype);
5344 icode = (int) optab_handler (optab, vec_mode);
5345 if (icode == CODE_FOR_nothing)
5346 return false;
5348 return true;
5352 /* Function vectorizable_shift.
5354 Check if STMT_INFO performs a shift operation that can be vectorized.
5355 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5356 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5357 Return true if STMT_INFO is vectorizable in this way. */
5359 bool
5360 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5361 stmt_vec_info *vec_stmt, slp_tree slp_node,
5362 stmt_vector_for_cost *cost_vec)
5364 tree vec_dest;
5365 tree scalar_dest;
5366 tree op0, op1 = NULL;
5367 tree vec_oprnd1 = NULL_TREE;
5368 tree vectype;
5369 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5370 enum tree_code code;
5371 machine_mode vec_mode;
5372 tree new_temp;
5373 optab optab;
5374 int icode;
5375 machine_mode optab_op2_mode;
5376 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5377 int ndts = 2;
5378 stmt_vec_info prev_stmt_info;
5379 poly_uint64 nunits_in;
5380 poly_uint64 nunits_out;
5381 tree vectype_out;
5382 tree op1_vectype;
5383 int ncopies;
5384 int j, i;
5385 vec<tree> vec_oprnds0 = vNULL;
5386 vec<tree> vec_oprnds1 = vNULL;
5387 tree vop0, vop1;
5388 unsigned int k;
5389 bool scalar_shift_arg = true;
5390 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5391 vec_info *vinfo = stmt_info->vinfo;
5393 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5394 return false;
5396 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5397 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5398 && ! vec_stmt)
5399 return false;
5401 /* Is STMT a vectorizable binary/unary operation? */
5402 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5403 if (!stmt)
5404 return false;
5406 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5407 return false;
5409 code = gimple_assign_rhs_code (stmt);
5411 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5412 || code == RROTATE_EXPR))
5413 return false;
5415 scalar_dest = gimple_assign_lhs (stmt);
5416 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5417 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5419 if (dump_enabled_p ())
5420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5421 "bit-precision shifts not supported.\n");
5422 return false;
5425 op0 = gimple_assign_rhs1 (stmt);
5426 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5428 if (dump_enabled_p ())
5429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5430 "use not simple.\n");
5431 return false;
5433 /* If op0 is an external or constant def use a vector type with
5434 the same size as the output vector type. */
5435 if (!vectype)
5436 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5437 if (vec_stmt)
5438 gcc_assert (vectype);
5439 if (!vectype)
5441 if (dump_enabled_p ())
5442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5443 "no vectype for scalar type\n");
5444 return false;
5447 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5448 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5449 if (maybe_ne (nunits_out, nunits_in))
5450 return false;
5452 op1 = gimple_assign_rhs2 (stmt);
5453 stmt_vec_info op1_def_stmt_info;
5454 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5455 &op1_def_stmt_info))
5457 if (dump_enabled_p ())
5458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5459 "use not simple.\n");
5460 return false;
5463 /* Multiple types in SLP are handled by creating the appropriate number of
5464 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5465 case of SLP. */
5466 if (slp_node)
5467 ncopies = 1;
5468 else
5469 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5471 gcc_assert (ncopies >= 1);
5473 /* Determine whether the shift amount is a vector, or scalar. If the
5474 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5476 if ((dt[1] == vect_internal_def
5477 || dt[1] == vect_induction_def
5478 || dt[1] == vect_nested_cycle)
5479 && !slp_node)
5480 scalar_shift_arg = false;
5481 else if (dt[1] == vect_constant_def
5482 || dt[1] == vect_external_def
5483 || dt[1] == vect_internal_def)
5485 /* In SLP, need to check whether the shift count is the same,
5486 in loops if it is a constant or invariant, it is always
5487 a scalar shift. */
5488 if (slp_node)
5490 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5491 stmt_vec_info slpstmt_info;
5493 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5495 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5496 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5497 scalar_shift_arg = false;
5501 /* If the shift amount is computed by a pattern stmt we cannot
5502 use the scalar amount directly thus give up and use a vector
5503 shift. */
5504 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5505 scalar_shift_arg = false;
5507 else
5509 if (dump_enabled_p ())
5510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5511 "operand mode requires invariant argument.\n");
5512 return false;
5515 /* Vector shifted by vector. */
5516 if (!scalar_shift_arg)
5518 optab = optab_for_tree_code (code, vectype, optab_vector);
5519 if (dump_enabled_p ())
5520 dump_printf_loc (MSG_NOTE, vect_location,
5521 "vector/vector shift/rotate found.\n");
5523 if (!op1_vectype)
5524 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5525 if (op1_vectype == NULL_TREE
5526 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5528 if (dump_enabled_p ())
5529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5530 "unusable type for last operand in"
5531 " vector/vector shift/rotate.\n");
5532 return false;
5535 /* See if the machine has a vector shifted by scalar insn and if not
5536 then see if it has a vector shifted by vector insn. */
5537 else
5539 optab = optab_for_tree_code (code, vectype, optab_scalar);
5540 if (optab
5541 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5543 if (dump_enabled_p ())
5544 dump_printf_loc (MSG_NOTE, vect_location,
5545 "vector/scalar shift/rotate found.\n");
5547 else
5549 optab = optab_for_tree_code (code, vectype, optab_vector);
5550 if (optab
5551 && (optab_handler (optab, TYPE_MODE (vectype))
5552 != CODE_FOR_nothing))
5554 scalar_shift_arg = false;
5556 if (dump_enabled_p ())
5557 dump_printf_loc (MSG_NOTE, vect_location,
5558 "vector/vector shift/rotate found.\n");
5560 /* Unlike the other binary operators, shifts/rotates have
5561 the rhs being int, instead of the same type as the lhs,
5562 so make sure the scalar is the right type if we are
5563 dealing with vectors of long long/long/short/char. */
5564 if (dt[1] == vect_constant_def)
5565 op1 = fold_convert (TREE_TYPE (vectype), op1);
5566 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5567 TREE_TYPE (op1)))
5569 if (slp_node
5570 && TYPE_MODE (TREE_TYPE (vectype))
5571 != TYPE_MODE (TREE_TYPE (op1)))
5573 if (dump_enabled_p ())
5574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5575 "unusable type for last operand in"
5576 " vector/vector shift/rotate.\n");
5577 return false;
5579 if (vec_stmt && !slp_node)
5581 op1 = fold_convert (TREE_TYPE (vectype), op1);
5582 op1 = vect_init_vector (stmt_info, op1,
5583 TREE_TYPE (vectype), NULL);
5590 /* Supportable by target? */
5591 if (!optab)
5593 if (dump_enabled_p ())
5594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5595 "no optab.\n");
5596 return false;
5598 vec_mode = TYPE_MODE (vectype);
5599 icode = (int) optab_handler (optab, vec_mode);
5600 if (icode == CODE_FOR_nothing)
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5604 "op not supported by target.\n");
5605 /* Check only during analysis. */
5606 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5607 || (!vec_stmt
5608 && !vect_worthwhile_without_simd_p (vinfo, code)))
5609 return false;
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_NOTE, vect_location,
5612 "proceeding using word mode.\n");
5615 /* Worthwhile without SIMD support? Check only during analysis. */
5616 if (!vec_stmt
5617 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5618 && !vect_worthwhile_without_simd_p (vinfo, code))
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5622 "not worthwhile without SIMD support.\n");
5623 return false;
5626 if (!vec_stmt) /* transformation not required. */
5628 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5629 DUMP_VECT_SCOPE ("vectorizable_shift");
5630 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5631 return true;
5634 /* Transform. */
5636 if (dump_enabled_p ())
5637 dump_printf_loc (MSG_NOTE, vect_location,
5638 "transform binary/unary operation.\n");
5640 /* Handle def. */
5641 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5643 prev_stmt_info = NULL;
5644 for (j = 0; j < ncopies; j++)
5646 /* Handle uses. */
5647 if (j == 0)
5649 if (scalar_shift_arg)
5651 /* Vector shl and shr insn patterns can be defined with scalar
5652 operand 2 (shift operand). In this case, use constant or loop
5653 invariant op1 directly, without extending it to vector mode
5654 first. */
5655 optab_op2_mode = insn_data[icode].operand[2].mode;
5656 if (!VECTOR_MODE_P (optab_op2_mode))
5658 if (dump_enabled_p ())
5659 dump_printf_loc (MSG_NOTE, vect_location,
5660 "operand 1 using scalar mode.\n");
5661 vec_oprnd1 = op1;
5662 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5663 vec_oprnds1.quick_push (vec_oprnd1);
5664 if (slp_node)
5666 /* Store vec_oprnd1 for every vector stmt to be created
5667 for SLP_NODE. We check during the analysis that all
5668 the shift arguments are the same.
5669 TODO: Allow different constants for different vector
5670 stmts generated for an SLP instance. */
5671 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5672 vec_oprnds1.quick_push (vec_oprnd1);
5677 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5678 (a special case for certain kind of vector shifts); otherwise,
5679 operand 1 should be of a vector type (the usual case). */
5680 if (vec_oprnd1)
5681 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5682 slp_node);
5683 else
5684 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5685 slp_node);
5687 else
5688 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5690 /* Arguments are ready. Create the new vector stmt. */
5691 stmt_vec_info new_stmt_info = NULL;
5692 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5694 vop1 = vec_oprnds1[i];
5695 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5696 new_temp = make_ssa_name (vec_dest, new_stmt);
5697 gimple_assign_set_lhs (new_stmt, new_temp);
5698 new_stmt_info
5699 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5700 if (slp_node)
5701 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5704 if (slp_node)
5705 continue;
5707 if (j == 0)
5708 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5709 else
5710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5711 prev_stmt_info = new_stmt_info;
5714 vec_oprnds0.release ();
5715 vec_oprnds1.release ();
5717 return true;
5721 /* Function vectorizable_operation.
5723 Check if STMT_INFO performs a binary, unary or ternary operation that can
5724 be vectorized.
5725 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5726 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5727 Return true if STMT_INFO is vectorizable in this way. */
5729 static bool
5730 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5731 stmt_vec_info *vec_stmt, slp_tree slp_node,
5732 stmt_vector_for_cost *cost_vec)
5734 tree vec_dest;
5735 tree scalar_dest;
5736 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5737 tree vectype;
5738 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5739 enum tree_code code, orig_code;
5740 machine_mode vec_mode;
5741 tree new_temp;
5742 int op_type;
5743 optab optab;
5744 bool target_support_p;
5745 enum vect_def_type dt[3]
5746 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5747 int ndts = 3;
5748 stmt_vec_info prev_stmt_info;
5749 poly_uint64 nunits_in;
5750 poly_uint64 nunits_out;
5751 tree vectype_out;
5752 int ncopies;
5753 int j, i;
5754 vec<tree> vec_oprnds0 = vNULL;
5755 vec<tree> vec_oprnds1 = vNULL;
5756 vec<tree> vec_oprnds2 = vNULL;
5757 tree vop0, vop1, vop2;
5758 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5759 vec_info *vinfo = stmt_info->vinfo;
5761 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5762 return false;
5764 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5765 && ! vec_stmt)
5766 return false;
5768 /* Is STMT a vectorizable binary/unary operation? */
5769 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5770 if (!stmt)
5771 return false;
5773 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5774 return false;
5776 orig_code = code = gimple_assign_rhs_code (stmt);
5778 /* For pointer addition and subtraction, we should use the normal
5779 plus and minus for the vector operation. */
5780 if (code == POINTER_PLUS_EXPR)
5781 code = PLUS_EXPR;
5782 if (code == POINTER_DIFF_EXPR)
5783 code = MINUS_EXPR;
5785 /* Support only unary or binary operations. */
5786 op_type = TREE_CODE_LENGTH (code);
5787 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5789 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5791 "num. args = %d (not unary/binary/ternary op).\n",
5792 op_type);
5793 return false;
5796 scalar_dest = gimple_assign_lhs (stmt);
5797 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5799 /* Most operations cannot handle bit-precision types without extra
5800 truncations. */
5801 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5802 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5803 /* Exception are bitwise binary operations. */
5804 && code != BIT_IOR_EXPR
5805 && code != BIT_XOR_EXPR
5806 && code != BIT_AND_EXPR)
5808 if (dump_enabled_p ())
5809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5810 "bit-precision arithmetic not supported.\n");
5811 return false;
5814 op0 = gimple_assign_rhs1 (stmt);
5815 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5817 if (dump_enabled_p ())
5818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5819 "use not simple.\n");
5820 return false;
5822 /* If op0 is an external or constant def use a vector type with
5823 the same size as the output vector type. */
5824 if (!vectype)
5826 /* For boolean type we cannot determine vectype by
5827 invariant value (don't know whether it is a vector
5828 of booleans or vector of integers). We use output
5829 vectype because operations on boolean don't change
5830 type. */
5831 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5833 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5835 if (dump_enabled_p ())
5836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5837 "not supported operation on bool value.\n");
5838 return false;
5840 vectype = vectype_out;
5842 else
5843 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5845 if (vec_stmt)
5846 gcc_assert (vectype);
5847 if (!vectype)
5849 if (dump_enabled_p ())
5850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5851 "no vectype for scalar type %T\n",
5852 TREE_TYPE (op0));
5854 return false;
5857 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5858 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5859 if (maybe_ne (nunits_out, nunits_in))
5860 return false;
5862 if (op_type == binary_op || op_type == ternary_op)
5864 op1 = gimple_assign_rhs2 (stmt);
5865 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5867 if (dump_enabled_p ())
5868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5869 "use not simple.\n");
5870 return false;
5873 if (op_type == ternary_op)
5875 op2 = gimple_assign_rhs3 (stmt);
5876 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
5878 if (dump_enabled_p ())
5879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5880 "use not simple.\n");
5881 return false;
5885 /* Multiple types in SLP are handled by creating the appropriate number of
5886 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5887 case of SLP. */
5888 if (slp_node)
5889 ncopies = 1;
5890 else
5891 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5893 gcc_assert (ncopies >= 1);
5895 /* Shifts are handled in vectorizable_shift (). */
5896 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5897 || code == RROTATE_EXPR)
5898 return false;
5900 /* Supportable by target? */
5902 vec_mode = TYPE_MODE (vectype);
5903 if (code == MULT_HIGHPART_EXPR)
5904 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5905 else
5907 optab = optab_for_tree_code (code, vectype, optab_default);
5908 if (!optab)
5910 if (dump_enabled_p ())
5911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5912 "no optab.\n");
5913 return false;
5915 target_support_p = (optab_handler (optab, vec_mode)
5916 != CODE_FOR_nothing);
5919 if (!target_support_p)
5921 if (dump_enabled_p ())
5922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5923 "op not supported by target.\n");
5924 /* Check only during analysis. */
5925 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5926 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5927 return false;
5928 if (dump_enabled_p ())
5929 dump_printf_loc (MSG_NOTE, vect_location,
5930 "proceeding using word mode.\n");
5933 /* Worthwhile without SIMD support? Check only during analysis. */
5934 if (!VECTOR_MODE_P (vec_mode)
5935 && !vec_stmt
5936 && !vect_worthwhile_without_simd_p (vinfo, code))
5938 if (dump_enabled_p ())
5939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5940 "not worthwhile without SIMD support.\n");
5941 return false;
5944 if (!vec_stmt) /* transformation not required. */
5946 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5947 DUMP_VECT_SCOPE ("vectorizable_operation");
5948 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5949 return true;
5952 /* Transform. */
5954 if (dump_enabled_p ())
5955 dump_printf_loc (MSG_NOTE, vect_location,
5956 "transform binary/unary operation.\n");
5958 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5959 vectors with unsigned elements, but the result is signed. So, we
5960 need to compute the MINUS_EXPR into vectype temporary and
5961 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5962 tree vec_cvt_dest = NULL_TREE;
5963 if (orig_code == POINTER_DIFF_EXPR)
5965 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5966 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5968 /* Handle def. */
5969 else
5970 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5972 /* In case the vectorization factor (VF) is bigger than the number
5973 of elements that we can fit in a vectype (nunits), we have to generate
5974 more than one vector stmt - i.e - we need to "unroll" the
5975 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5976 from one copy of the vector stmt to the next, in the field
5977 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5978 stages to find the correct vector defs to be used when vectorizing
5979 stmts that use the defs of the current stmt. The example below
5980 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5981 we need to create 4 vectorized stmts):
5983 before vectorization:
5984 RELATED_STMT VEC_STMT
5985 S1: x = memref - -
5986 S2: z = x + 1 - -
5988 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5989 there):
5990 RELATED_STMT VEC_STMT
5991 VS1_0: vx0 = memref0 VS1_1 -
5992 VS1_1: vx1 = memref1 VS1_2 -
5993 VS1_2: vx2 = memref2 VS1_3 -
5994 VS1_3: vx3 = memref3 - -
5995 S1: x = load - VS1_0
5996 S2: z = x + 1 - -
5998 step2: vectorize stmt S2 (done here):
5999 To vectorize stmt S2 we first need to find the relevant vector
6000 def for the first operand 'x'. This is, as usual, obtained from
6001 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6002 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6003 relevant vector def 'vx0'. Having found 'vx0' we can generate
6004 the vector stmt VS2_0, and as usual, record it in the
6005 STMT_VINFO_VEC_STMT of stmt S2.
6006 When creating the second copy (VS2_1), we obtain the relevant vector
6007 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6008 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6009 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6010 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6011 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6012 chain of stmts and pointers:
6013 RELATED_STMT VEC_STMT
6014 VS1_0: vx0 = memref0 VS1_1 -
6015 VS1_1: vx1 = memref1 VS1_2 -
6016 VS1_2: vx2 = memref2 VS1_3 -
6017 VS1_3: vx3 = memref3 - -
6018 S1: x = load - VS1_0
6019 VS2_0: vz0 = vx0 + v1 VS2_1 -
6020 VS2_1: vz1 = vx1 + v1 VS2_2 -
6021 VS2_2: vz2 = vx2 + v1 VS2_3 -
6022 VS2_3: vz3 = vx3 + v1 - -
6023 S2: z = x + 1 - VS2_0 */
6025 prev_stmt_info = NULL;
6026 for (j = 0; j < ncopies; j++)
6028 /* Handle uses. */
6029 if (j == 0)
6031 if (op_type == binary_op)
6032 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6033 slp_node);
6034 else if (op_type == ternary_op)
6036 if (slp_node)
6038 auto_vec<tree> ops(3);
6039 ops.quick_push (op0);
6040 ops.quick_push (op1);
6041 ops.quick_push (op2);
6042 auto_vec<vec<tree> > vec_defs(3);
6043 vect_get_slp_defs (ops, slp_node, &vec_defs);
6044 vec_oprnds0 = vec_defs[0];
6045 vec_oprnds1 = vec_defs[1];
6046 vec_oprnds2 = vec_defs[2];
6048 else
6050 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6051 &vec_oprnds1, NULL);
6052 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6053 NULL, NULL);
6056 else
6057 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6058 slp_node);
6060 else
6062 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6063 if (op_type == ternary_op)
6065 tree vec_oprnd = vec_oprnds2.pop ();
6066 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6067 vec_oprnd));
6071 /* Arguments are ready. Create the new vector stmt. */
6072 stmt_vec_info new_stmt_info = NULL;
6073 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6075 vop1 = ((op_type == binary_op || op_type == ternary_op)
6076 ? vec_oprnds1[i] : NULL_TREE);
6077 vop2 = ((op_type == ternary_op)
6078 ? vec_oprnds2[i] : NULL_TREE);
6079 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6080 vop0, vop1, vop2);
6081 new_temp = make_ssa_name (vec_dest, new_stmt);
6082 gimple_assign_set_lhs (new_stmt, new_temp);
6083 new_stmt_info
6084 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6085 if (vec_cvt_dest)
6087 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6088 gassign *new_stmt
6089 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6090 new_temp);
6091 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6092 gimple_assign_set_lhs (new_stmt, new_temp);
6093 new_stmt_info
6094 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6096 if (slp_node)
6097 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6100 if (slp_node)
6101 continue;
6103 if (j == 0)
6104 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6105 else
6106 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6107 prev_stmt_info = new_stmt_info;
6110 vec_oprnds0.release ();
6111 vec_oprnds1.release ();
6112 vec_oprnds2.release ();
6114 return true;
6117 /* A helper function to ensure data reference DR_INFO's base alignment. */
6119 static void
6120 ensure_base_align (dr_vec_info *dr_info)
6122 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6123 return;
6125 if (dr_info->base_misaligned)
6127 tree base_decl = dr_info->base_decl;
6129 unsigned int align_base_to
6130 = DR_TARGET_ALIGNMENT (dr_info) * BITS_PER_UNIT;
6132 if (decl_in_symtab_p (base_decl))
6133 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6134 else
6136 SET_DECL_ALIGN (base_decl, align_base_to);
6137 DECL_USER_ALIGN (base_decl) = 1;
6139 dr_info->base_misaligned = false;
6144 /* Function get_group_alias_ptr_type.
6146 Return the alias type for the group starting at FIRST_STMT_INFO. */
6148 static tree
6149 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6151 struct data_reference *first_dr, *next_dr;
6153 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6154 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6155 while (next_stmt_info)
6157 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6158 if (get_alias_set (DR_REF (first_dr))
6159 != get_alias_set (DR_REF (next_dr)))
6161 if (dump_enabled_p ())
6162 dump_printf_loc (MSG_NOTE, vect_location,
6163 "conflicting alias set types.\n");
6164 return ptr_type_node;
6166 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6168 return reference_alias_ptr_type (DR_REF (first_dr));
6172 /* Function vectorizable_store.
6174 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6175 that can be vectorized.
6176 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6177 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6178 Return true if STMT_INFO is vectorizable in this way. */
6180 static bool
6181 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6182 stmt_vec_info *vec_stmt, slp_tree slp_node,
6183 stmt_vector_for_cost *cost_vec)
6185 tree data_ref;
6186 tree op;
6187 tree vec_oprnd = NULL_TREE;
6188 tree elem_type;
6189 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6190 struct loop *loop = NULL;
6191 machine_mode vec_mode;
6192 tree dummy;
6193 enum dr_alignment_support alignment_support_scheme;
6194 enum vect_def_type rhs_dt = vect_unknown_def_type;
6195 enum vect_def_type mask_dt = vect_unknown_def_type;
6196 stmt_vec_info prev_stmt_info = NULL;
6197 tree dataref_ptr = NULL_TREE;
6198 tree dataref_offset = NULL_TREE;
6199 gimple *ptr_incr = NULL;
6200 int ncopies;
6201 int j;
6202 stmt_vec_info first_stmt_info;
6203 bool grouped_store;
6204 unsigned int group_size, i;
6205 vec<tree> oprnds = vNULL;
6206 vec<tree> result_chain = vNULL;
6207 tree offset = NULL_TREE;
6208 vec<tree> vec_oprnds = vNULL;
6209 bool slp = (slp_node != NULL);
6210 unsigned int vec_num;
6211 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6212 vec_info *vinfo = stmt_info->vinfo;
6213 tree aggr_type;
6214 gather_scatter_info gs_info;
6215 poly_uint64 vf;
6216 vec_load_store_type vls_type;
6217 tree ref_type;
6219 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6220 return false;
6222 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6223 && ! vec_stmt)
6224 return false;
6226 /* Is vectorizable store? */
6228 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6229 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6231 tree scalar_dest = gimple_assign_lhs (assign);
6232 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6233 && is_pattern_stmt_p (stmt_info))
6234 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6235 if (TREE_CODE (scalar_dest) != ARRAY_REF
6236 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6237 && TREE_CODE (scalar_dest) != INDIRECT_REF
6238 && TREE_CODE (scalar_dest) != COMPONENT_REF
6239 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6240 && TREE_CODE (scalar_dest) != REALPART_EXPR
6241 && TREE_CODE (scalar_dest) != MEM_REF)
6242 return false;
6244 else
6246 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6247 if (!call || !gimple_call_internal_p (call))
6248 return false;
6250 internal_fn ifn = gimple_call_internal_fn (call);
6251 if (!internal_store_fn_p (ifn))
6252 return false;
6254 if (slp_node != NULL)
6256 if (dump_enabled_p ())
6257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6258 "SLP of masked stores not supported.\n");
6259 return false;
6262 int mask_index = internal_fn_mask_index (ifn);
6263 if (mask_index >= 0)
6265 mask = gimple_call_arg (call, mask_index);
6266 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6267 &mask_vectype))
6268 return false;
6272 op = vect_get_store_rhs (stmt_info);
6274 /* Cannot have hybrid store SLP -- that would mean storing to the
6275 same location twice. */
6276 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6278 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6279 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6281 if (loop_vinfo)
6283 loop = LOOP_VINFO_LOOP (loop_vinfo);
6284 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6286 else
6287 vf = 1;
6289 /* Multiple types in SLP are handled by creating the appropriate number of
6290 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6291 case of SLP. */
6292 if (slp)
6293 ncopies = 1;
6294 else
6295 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6297 gcc_assert (ncopies >= 1);
6299 /* FORNOW. This restriction should be relaxed. */
6300 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6302 if (dump_enabled_p ())
6303 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6304 "multiple types in nested loop.\n");
6305 return false;
6308 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6309 return false;
6311 elem_type = TREE_TYPE (vectype);
6312 vec_mode = TYPE_MODE (vectype);
6314 if (!STMT_VINFO_DATA_REF (stmt_info))
6315 return false;
6317 vect_memory_access_type memory_access_type;
6318 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6319 &memory_access_type, &gs_info))
6320 return false;
6322 if (mask)
6324 if (memory_access_type == VMAT_CONTIGUOUS)
6326 if (!VECTOR_MODE_P (vec_mode)
6327 || !can_vec_mask_load_store_p (vec_mode,
6328 TYPE_MODE (mask_vectype), false))
6329 return false;
6331 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6332 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6334 if (dump_enabled_p ())
6335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6336 "unsupported access type for masked store.\n");
6337 return false;
6340 else
6342 /* FORNOW. In some cases can vectorize even if data-type not supported
6343 (e.g. - array initialization with 0). */
6344 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6345 return false;
6348 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
6349 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6350 && memory_access_type != VMAT_GATHER_SCATTER
6351 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6352 if (grouped_store)
6354 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6355 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6356 group_size = DR_GROUP_SIZE (first_stmt_info);
6358 else
6360 first_stmt_info = stmt_info;
6361 first_dr_info = dr_info;
6362 group_size = vec_num = 1;
6365 if (!vec_stmt) /* transformation not required. */
6367 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6369 if (loop_vinfo
6370 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6371 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6372 memory_access_type, &gs_info);
6374 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6375 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6376 vls_type, slp_node, cost_vec);
6377 return true;
6379 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6381 /* Transform. */
6383 ensure_base_align (dr_info);
6385 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6387 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6388 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6389 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6390 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6391 edge pe = loop_preheader_edge (loop);
6392 gimple_seq seq;
6393 basic_block new_bb;
6394 enum { NARROW, NONE, WIDEN } modifier;
6395 poly_uint64 scatter_off_nunits
6396 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6398 if (known_eq (nunits, scatter_off_nunits))
6399 modifier = NONE;
6400 else if (known_eq (nunits * 2, scatter_off_nunits))
6402 modifier = WIDEN;
6404 /* Currently gathers and scatters are only supported for
6405 fixed-length vectors. */
6406 unsigned int count = scatter_off_nunits.to_constant ();
6407 vec_perm_builder sel (count, count, 1);
6408 for (i = 0; i < (unsigned int) count; ++i)
6409 sel.quick_push (i | (count / 2));
6411 vec_perm_indices indices (sel, 1, count);
6412 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6413 indices);
6414 gcc_assert (perm_mask != NULL_TREE);
6416 else if (known_eq (nunits, scatter_off_nunits * 2))
6418 modifier = NARROW;
6420 /* Currently gathers and scatters are only supported for
6421 fixed-length vectors. */
6422 unsigned int count = nunits.to_constant ();
6423 vec_perm_builder sel (count, count, 1);
6424 for (i = 0; i < (unsigned int) count; ++i)
6425 sel.quick_push (i | (count / 2));
6427 vec_perm_indices indices (sel, 2, count);
6428 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6429 gcc_assert (perm_mask != NULL_TREE);
6430 ncopies *= 2;
6432 else
6433 gcc_unreachable ();
6435 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6436 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6437 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6438 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6439 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6440 scaletype = TREE_VALUE (arglist);
6442 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6443 && TREE_CODE (rettype) == VOID_TYPE);
6445 ptr = fold_convert (ptrtype, gs_info.base);
6446 if (!is_gimple_min_invariant (ptr))
6448 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6449 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6450 gcc_assert (!new_bb);
6453 /* Currently we support only unconditional scatter stores,
6454 so mask should be all ones. */
6455 mask = build_int_cst (masktype, -1);
6456 mask = vect_init_vector (stmt_info, mask, masktype, NULL);
6458 scale = build_int_cst (scaletype, gs_info.scale);
6460 prev_stmt_info = NULL;
6461 for (j = 0; j < ncopies; ++j)
6463 if (j == 0)
6465 src = vec_oprnd1
6466 = vect_get_vec_def_for_operand (op, stmt_info);
6467 op = vec_oprnd0
6468 = vect_get_vec_def_for_operand (gs_info.offset, stmt_info);
6470 else if (modifier != NONE && (j & 1))
6472 if (modifier == WIDEN)
6474 src = vec_oprnd1
6475 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
6476 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6477 stmt_info, gsi);
6479 else if (modifier == NARROW)
6481 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6482 stmt_info, gsi);
6483 op = vec_oprnd0
6484 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
6486 else
6487 gcc_unreachable ();
6489 else
6491 src = vec_oprnd1
6492 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
6493 op = vec_oprnd0
6494 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
6497 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6499 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6500 TYPE_VECTOR_SUBPARTS (srctype)));
6501 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6502 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6503 gassign *new_stmt
6504 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6505 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6506 src = var;
6509 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6511 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6512 TYPE_VECTOR_SUBPARTS (idxtype)));
6513 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6514 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6515 gassign *new_stmt
6516 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6517 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6518 op = var;
6521 gcall *new_stmt
6522 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6523 stmt_vec_info new_stmt_info
6524 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6526 if (prev_stmt_info == NULL)
6527 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6528 else
6529 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6530 prev_stmt_info = new_stmt_info;
6532 return true;
6535 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6536 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6538 if (grouped_store)
6540 /* FORNOW */
6541 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6543 /* We vectorize all the stmts of the interleaving group when we
6544 reach the last stmt in the group. */
6545 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6546 < DR_GROUP_SIZE (first_stmt_info)
6547 && !slp)
6549 *vec_stmt = NULL;
6550 return true;
6553 if (slp)
6555 grouped_store = false;
6556 /* VEC_NUM is the number of vect stmts to be created for this
6557 group. */
6558 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6559 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6560 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6561 == first_stmt_info);
6562 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6563 op = vect_get_store_rhs (first_stmt_info);
6565 else
6566 /* VEC_NUM is the number of vect stmts to be created for this
6567 group. */
6568 vec_num = group_size;
6570 ref_type = get_group_alias_ptr_type (first_stmt_info);
6572 else
6573 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
6575 if (dump_enabled_p ())
6576 dump_printf_loc (MSG_NOTE, vect_location,
6577 "transform store. ncopies = %d\n", ncopies);
6579 if (memory_access_type == VMAT_ELEMENTWISE
6580 || memory_access_type == VMAT_STRIDED_SLP)
6582 gimple_stmt_iterator incr_gsi;
6583 bool insert_after;
6584 gimple *incr;
6585 tree offvar;
6586 tree ivstep;
6587 tree running_off;
6588 tree stride_base, stride_step, alias_off;
6589 tree vec_oprnd;
6590 unsigned int g;
6591 /* Checked by get_load_store_type. */
6592 unsigned int const_nunits = nunits.to_constant ();
6594 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6595 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6597 stride_base
6598 = fold_build_pointer_plus
6599 (DR_BASE_ADDRESS (first_dr_info->dr),
6600 size_binop (PLUS_EXPR,
6601 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6602 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6603 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
6605 /* For a store with loop-invariant (but other than power-of-2)
6606 stride (i.e. not a grouped access) like so:
6608 for (i = 0; i < n; i += stride)
6609 array[i] = ...;
6611 we generate a new induction variable and new stores from
6612 the components of the (vectorized) rhs:
6614 for (j = 0; ; j += VF*stride)
6615 vectemp = ...;
6616 tmp1 = vectemp[0];
6617 array[j] = tmp1;
6618 tmp2 = vectemp[1];
6619 array[j + stride] = tmp2;
6623 unsigned nstores = const_nunits;
6624 unsigned lnel = 1;
6625 tree ltype = elem_type;
6626 tree lvectype = vectype;
6627 if (slp)
6629 if (group_size < const_nunits
6630 && const_nunits % group_size == 0)
6632 nstores = const_nunits / group_size;
6633 lnel = group_size;
6634 ltype = build_vector_type (elem_type, group_size);
6635 lvectype = vectype;
6637 /* First check if vec_extract optab doesn't support extraction
6638 of vector elts directly. */
6639 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6640 machine_mode vmode;
6641 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6642 || !VECTOR_MODE_P (vmode)
6643 || !targetm.vector_mode_supported_p (vmode)
6644 || (convert_optab_handler (vec_extract_optab,
6645 TYPE_MODE (vectype), vmode)
6646 == CODE_FOR_nothing))
6648 /* Try to avoid emitting an extract of vector elements
6649 by performing the extracts using an integer type of the
6650 same size, extracting from a vector of those and then
6651 re-interpreting it as the original vector type if
6652 supported. */
6653 unsigned lsize
6654 = group_size * GET_MODE_BITSIZE (elmode);
6655 elmode = int_mode_for_size (lsize, 0).require ();
6656 unsigned int lnunits = const_nunits / group_size;
6657 /* If we can't construct such a vector fall back to
6658 element extracts from the original vector type and
6659 element size stores. */
6660 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6661 && VECTOR_MODE_P (vmode)
6662 && targetm.vector_mode_supported_p (vmode)
6663 && (convert_optab_handler (vec_extract_optab,
6664 vmode, elmode)
6665 != CODE_FOR_nothing))
6667 nstores = lnunits;
6668 lnel = group_size;
6669 ltype = build_nonstandard_integer_type (lsize, 1);
6670 lvectype = build_vector_type (ltype, nstores);
6672 /* Else fall back to vector extraction anyway.
6673 Fewer stores are more important than avoiding spilling
6674 of the vector we extract from. Compared to the
6675 construction case in vectorizable_load no store-forwarding
6676 issue exists here for reasonable archs. */
6679 else if (group_size >= const_nunits
6680 && group_size % const_nunits == 0)
6682 nstores = 1;
6683 lnel = const_nunits;
6684 ltype = vectype;
6685 lvectype = vectype;
6687 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6688 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6691 ivstep = stride_step;
6692 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6693 build_int_cst (TREE_TYPE (ivstep), vf));
6695 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6697 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6698 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6699 create_iv (stride_base, ivstep, NULL,
6700 loop, &incr_gsi, insert_after,
6701 &offvar, NULL);
6702 incr = gsi_stmt (incr_gsi);
6703 loop_vinfo->add_stmt (incr);
6705 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6707 prev_stmt_info = NULL;
6708 alias_off = build_int_cst (ref_type, 0);
6709 stmt_vec_info next_stmt_info = first_stmt_info;
6710 for (g = 0; g < group_size; g++)
6712 running_off = offvar;
6713 if (g)
6715 tree size = TYPE_SIZE_UNIT (ltype);
6716 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6717 size);
6718 tree newoff = copy_ssa_name (running_off, NULL);
6719 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6720 running_off, pos);
6721 vect_finish_stmt_generation (stmt_info, incr, gsi);
6722 running_off = newoff;
6724 unsigned int group_el = 0;
6725 unsigned HOST_WIDE_INT
6726 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6727 for (j = 0; j < ncopies; j++)
6729 /* We've set op and dt above, from vect_get_store_rhs,
6730 and first_stmt_info == stmt_info. */
6731 if (j == 0)
6733 if (slp)
6735 vect_get_vec_defs (op, NULL_TREE, stmt_info,
6736 &vec_oprnds, NULL, slp_node);
6737 vec_oprnd = vec_oprnds[0];
6739 else
6741 op = vect_get_store_rhs (next_stmt_info);
6742 vec_oprnd = vect_get_vec_def_for_operand
6743 (op, next_stmt_info);
6746 else
6748 if (slp)
6749 vec_oprnd = vec_oprnds[j];
6750 else
6751 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6752 vec_oprnd);
6754 /* Pun the vector to extract from if necessary. */
6755 if (lvectype != vectype)
6757 tree tem = make_ssa_name (lvectype);
6758 gimple *pun
6759 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6760 lvectype, vec_oprnd));
6761 vect_finish_stmt_generation (stmt_info, pun, gsi);
6762 vec_oprnd = tem;
6764 for (i = 0; i < nstores; i++)
6766 tree newref, newoff;
6767 gimple *incr, *assign;
6768 tree size = TYPE_SIZE (ltype);
6769 /* Extract the i'th component. */
6770 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6771 bitsize_int (i), size);
6772 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6773 size, pos);
6775 elem = force_gimple_operand_gsi (gsi, elem, true,
6776 NULL_TREE, true,
6777 GSI_SAME_STMT);
6779 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6780 group_el * elsz);
6781 newref = build2 (MEM_REF, ltype,
6782 running_off, this_off);
6783 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
6785 /* And store it to *running_off. */
6786 assign = gimple_build_assign (newref, elem);
6787 stmt_vec_info assign_info
6788 = vect_finish_stmt_generation (stmt_info, assign, gsi);
6790 group_el += lnel;
6791 if (! slp
6792 || group_el == group_size)
6794 newoff = copy_ssa_name (running_off, NULL);
6795 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6796 running_off, stride_step);
6797 vect_finish_stmt_generation (stmt_info, incr, gsi);
6799 running_off = newoff;
6800 group_el = 0;
6802 if (g == group_size - 1
6803 && !slp)
6805 if (j == 0 && i == 0)
6806 STMT_VINFO_VEC_STMT (stmt_info)
6807 = *vec_stmt = assign_info;
6808 else
6809 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6810 prev_stmt_info = assign_info;
6814 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6815 if (slp)
6816 break;
6819 vec_oprnds.release ();
6820 return true;
6823 auto_vec<tree> dr_chain (group_size);
6824 oprnds.create (group_size);
6826 alignment_support_scheme
6827 = vect_supportable_dr_alignment (first_dr_info, false);
6828 gcc_assert (alignment_support_scheme);
6829 vec_loop_masks *loop_masks
6830 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6831 ? &LOOP_VINFO_MASKS (loop_vinfo)
6832 : NULL);
6833 /* Targets with store-lane instructions must not require explicit
6834 realignment. vect_supportable_dr_alignment always returns either
6835 dr_aligned or dr_unaligned_supported for masked operations. */
6836 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6837 && !mask
6838 && !loop_masks)
6839 || alignment_support_scheme == dr_aligned
6840 || alignment_support_scheme == dr_unaligned_supported);
6842 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6843 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6844 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6846 tree bump;
6847 tree vec_offset = NULL_TREE;
6848 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6850 aggr_type = NULL_TREE;
6851 bump = NULL_TREE;
6853 else if (memory_access_type == VMAT_GATHER_SCATTER)
6855 aggr_type = elem_type;
6856 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
6857 &bump, &vec_offset);
6859 else
6861 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6862 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6863 else
6864 aggr_type = vectype;
6865 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
6866 memory_access_type);
6869 if (mask)
6870 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6872 /* In case the vectorization factor (VF) is bigger than the number
6873 of elements that we can fit in a vectype (nunits), we have to generate
6874 more than one vector stmt - i.e - we need to "unroll" the
6875 vector stmt by a factor VF/nunits. For more details see documentation in
6876 vect_get_vec_def_for_copy_stmt. */
6878 /* In case of interleaving (non-unit grouped access):
6880 S1: &base + 2 = x2
6881 S2: &base = x0
6882 S3: &base + 1 = x1
6883 S4: &base + 3 = x3
6885 We create vectorized stores starting from base address (the access of the
6886 first stmt in the chain (S2 in the above example), when the last store stmt
6887 of the chain (S4) is reached:
6889 VS1: &base = vx2
6890 VS2: &base + vec_size*1 = vx0
6891 VS3: &base + vec_size*2 = vx1
6892 VS4: &base + vec_size*3 = vx3
6894 Then permutation statements are generated:
6896 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6897 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6900 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6901 (the order of the data-refs in the output of vect_permute_store_chain
6902 corresponds to the order of scalar stmts in the interleaving chain - see
6903 the documentation of vect_permute_store_chain()).
6905 In case of both multiple types and interleaving, above vector stores and
6906 permutation stmts are created for every copy. The result vector stmts are
6907 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6908 STMT_VINFO_RELATED_STMT for the next copies.
6911 prev_stmt_info = NULL;
6912 tree vec_mask = NULL_TREE;
6913 for (j = 0; j < ncopies; j++)
6915 stmt_vec_info new_stmt_info;
6916 if (j == 0)
6918 if (slp)
6920 /* Get vectorized arguments for SLP_NODE. */
6921 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
6922 NULL, slp_node);
6924 vec_oprnd = vec_oprnds[0];
6926 else
6928 /* For interleaved stores we collect vectorized defs for all the
6929 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6930 used as an input to vect_permute_store_chain(), and OPRNDS as
6931 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6933 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6934 OPRNDS are of size 1. */
6935 stmt_vec_info next_stmt_info = first_stmt_info;
6936 for (i = 0; i < group_size; i++)
6938 /* Since gaps are not supported for interleaved stores,
6939 DR_GROUP_SIZE is the exact number of stmts in the chain.
6940 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6941 that there is no interleaving, DR_GROUP_SIZE is 1,
6942 and only one iteration of the loop will be executed. */
6943 op = vect_get_store_rhs (next_stmt_info);
6944 vec_oprnd = vect_get_vec_def_for_operand
6945 (op, next_stmt_info);
6946 dr_chain.quick_push (vec_oprnd);
6947 oprnds.quick_push (vec_oprnd);
6948 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6950 if (mask)
6951 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
6952 mask_vectype);
6955 /* We should have catched mismatched types earlier. */
6956 gcc_assert (useless_type_conversion_p (vectype,
6957 TREE_TYPE (vec_oprnd)));
6958 bool simd_lane_access_p
6959 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6960 if (simd_lane_access_p
6961 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
6962 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
6963 && integer_zerop (DR_OFFSET (first_dr_info->dr))
6964 && integer_zerop (DR_INIT (first_dr_info->dr))
6965 && alias_sets_conflict_p (get_alias_set (aggr_type),
6966 get_alias_set (TREE_TYPE (ref_type))))
6968 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
6969 dataref_offset = build_int_cst (ref_type, 0);
6971 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6972 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
6973 &dataref_ptr, &vec_offset);
6974 else
6975 dataref_ptr
6976 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
6977 simd_lane_access_p ? loop : NULL,
6978 offset, &dummy, gsi, &ptr_incr,
6979 simd_lane_access_p, NULL_TREE, bump);
6981 else
6983 /* For interleaved stores we created vectorized defs for all the
6984 defs stored in OPRNDS in the previous iteration (previous copy).
6985 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6986 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6987 next copy.
6988 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6989 OPRNDS are of size 1. */
6990 for (i = 0; i < group_size; i++)
6992 op = oprnds[i];
6993 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
6994 dr_chain[i] = vec_oprnd;
6995 oprnds[i] = vec_oprnd;
6997 if (mask)
6998 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
6999 if (dataref_offset)
7000 dataref_offset
7001 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7002 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7003 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7004 else
7005 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7006 stmt_info, bump);
7009 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7011 tree vec_array;
7013 /* Get an array into which we can store the individual vectors. */
7014 vec_array = create_vector_array (vectype, vec_num);
7016 /* Invalidate the current contents of VEC_ARRAY. This should
7017 become an RTL clobber too, which prevents the vector registers
7018 from being upward-exposed. */
7019 vect_clobber_variable (stmt_info, gsi, vec_array);
7021 /* Store the individual vectors into the array. */
7022 for (i = 0; i < vec_num; i++)
7024 vec_oprnd = dr_chain[i];
7025 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7028 tree final_mask = NULL;
7029 if (loop_masks)
7030 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7031 vectype, j);
7032 if (vec_mask)
7033 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7034 vec_mask, gsi);
7036 gcall *call;
7037 if (final_mask)
7039 /* Emit:
7040 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7041 VEC_ARRAY). */
7042 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7043 tree alias_ptr = build_int_cst (ref_type, align);
7044 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7045 dataref_ptr, alias_ptr,
7046 final_mask, vec_array);
7048 else
7050 /* Emit:
7051 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7052 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7053 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7054 vec_array);
7055 gimple_call_set_lhs (call, data_ref);
7057 gimple_call_set_nothrow (call, true);
7058 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7060 /* Record that VEC_ARRAY is now dead. */
7061 vect_clobber_variable (stmt_info, gsi, vec_array);
7063 else
7065 new_stmt_info = NULL;
7066 if (grouped_store)
7068 if (j == 0)
7069 result_chain.create (group_size);
7070 /* Permute. */
7071 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7072 &result_chain);
7075 stmt_vec_info next_stmt_info = first_stmt_info;
7076 for (i = 0; i < vec_num; i++)
7078 unsigned align, misalign;
7080 tree final_mask = NULL_TREE;
7081 if (loop_masks)
7082 final_mask = vect_get_loop_mask (gsi, loop_masks,
7083 vec_num * ncopies,
7084 vectype, vec_num * j + i);
7085 if (vec_mask)
7086 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7087 vec_mask, gsi);
7089 if (memory_access_type == VMAT_GATHER_SCATTER)
7091 tree scale = size_int (gs_info.scale);
7092 gcall *call;
7093 if (loop_masks)
7094 call = gimple_build_call_internal
7095 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7096 scale, vec_oprnd, final_mask);
7097 else
7098 call = gimple_build_call_internal
7099 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7100 scale, vec_oprnd);
7101 gimple_call_set_nothrow (call, true);
7102 new_stmt_info
7103 = vect_finish_stmt_generation (stmt_info, call, gsi);
7104 break;
7107 if (i > 0)
7108 /* Bump the vector pointer. */
7109 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7110 stmt_info, bump);
7112 if (slp)
7113 vec_oprnd = vec_oprnds[i];
7114 else if (grouped_store)
7115 /* For grouped stores vectorized defs are interleaved in
7116 vect_permute_store_chain(). */
7117 vec_oprnd = result_chain[i];
7119 align = DR_TARGET_ALIGNMENT (first_dr_info);
7120 if (aligned_access_p (first_dr_info))
7121 misalign = 0;
7122 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7124 align = dr_alignment (vect_dr_behavior (first_dr_info));
7125 misalign = 0;
7127 else
7128 misalign = DR_MISALIGNMENT (first_dr_info);
7129 if (dataref_offset == NULL_TREE
7130 && TREE_CODE (dataref_ptr) == SSA_NAME)
7131 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7132 misalign);
7134 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7136 tree perm_mask = perm_mask_for_reverse (vectype);
7137 tree perm_dest = vect_create_destination_var
7138 (vect_get_store_rhs (stmt_info), vectype);
7139 tree new_temp = make_ssa_name (perm_dest);
7141 /* Generate the permute statement. */
7142 gimple *perm_stmt
7143 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7144 vec_oprnd, perm_mask);
7145 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7147 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7148 vec_oprnd = new_temp;
7151 /* Arguments are ready. Create the new vector stmt. */
7152 if (final_mask)
7154 align = least_bit_hwi (misalign | align);
7155 tree ptr = build_int_cst (ref_type, align);
7156 gcall *call
7157 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7158 dataref_ptr, ptr,
7159 final_mask, vec_oprnd);
7160 gimple_call_set_nothrow (call, true);
7161 new_stmt_info
7162 = vect_finish_stmt_generation (stmt_info, call, gsi);
7164 else
7166 data_ref = fold_build2 (MEM_REF, vectype,
7167 dataref_ptr,
7168 dataref_offset
7169 ? dataref_offset
7170 : build_int_cst (ref_type, 0));
7171 if (aligned_access_p (first_dr_info))
7173 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7174 TREE_TYPE (data_ref)
7175 = build_aligned_type (TREE_TYPE (data_ref),
7176 align * BITS_PER_UNIT);
7177 else
7178 TREE_TYPE (data_ref)
7179 = build_aligned_type (TREE_TYPE (data_ref),
7180 TYPE_ALIGN (elem_type));
7181 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7182 gassign *new_stmt
7183 = gimple_build_assign (data_ref, vec_oprnd);
7184 new_stmt_info
7185 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7188 if (slp)
7189 continue;
7191 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7192 if (!next_stmt_info)
7193 break;
7196 if (!slp)
7198 if (j == 0)
7199 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7200 else
7201 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7202 prev_stmt_info = new_stmt_info;
7206 oprnds.release ();
7207 result_chain.release ();
7208 vec_oprnds.release ();
7210 return true;
7213 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7214 VECTOR_CST mask. No checks are made that the target platform supports the
7215 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7216 vect_gen_perm_mask_checked. */
7218 tree
7219 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7221 tree mask_type;
7223 poly_uint64 nunits = sel.length ();
7224 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7226 mask_type = build_vector_type (ssizetype, nunits);
7227 return vec_perm_indices_to_tree (mask_type, sel);
7230 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7231 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7233 tree
7234 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7236 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7237 return vect_gen_perm_mask_any (vectype, sel);
7240 /* Given a vector variable X and Y, that was generated for the scalar
7241 STMT_INFO, generate instructions to permute the vector elements of X and Y
7242 using permutation mask MASK_VEC, insert them at *GSI and return the
7243 permuted vector variable. */
7245 static tree
7246 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7247 gimple_stmt_iterator *gsi)
7249 tree vectype = TREE_TYPE (x);
7250 tree perm_dest, data_ref;
7251 gimple *perm_stmt;
7253 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7254 if (TREE_CODE (scalar_dest) == SSA_NAME)
7255 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7256 else
7257 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7258 data_ref = make_ssa_name (perm_dest);
7260 /* Generate the permute statement. */
7261 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7262 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7264 return data_ref;
7267 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7268 inserting them on the loops preheader edge. Returns true if we
7269 were successful in doing so (and thus STMT_INFO can be moved then),
7270 otherwise returns false. */
7272 static bool
7273 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7275 ssa_op_iter i;
7276 tree op;
7277 bool any = false;
7279 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7281 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7282 if (!gimple_nop_p (def_stmt)
7283 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7285 /* Make sure we don't need to recurse. While we could do
7286 so in simple cases when there are more complex use webs
7287 we don't have an easy way to preserve stmt order to fulfil
7288 dependencies within them. */
7289 tree op2;
7290 ssa_op_iter i2;
7291 if (gimple_code (def_stmt) == GIMPLE_PHI)
7292 return false;
7293 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7295 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7296 if (!gimple_nop_p (def_stmt2)
7297 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7298 return false;
7300 any = true;
7304 if (!any)
7305 return true;
7307 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7309 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7310 if (!gimple_nop_p (def_stmt)
7311 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7313 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7314 gsi_remove (&gsi, false);
7315 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7319 return true;
7322 /* vectorizable_load.
7324 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7325 that can be vectorized.
7326 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7327 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7328 Return true if STMT_INFO is vectorizable in this way. */
7330 static bool
7331 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7332 stmt_vec_info *vec_stmt, slp_tree slp_node,
7333 slp_instance slp_node_instance,
7334 stmt_vector_for_cost *cost_vec)
7336 tree scalar_dest;
7337 tree vec_dest = NULL;
7338 tree data_ref = NULL;
7339 stmt_vec_info prev_stmt_info;
7340 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7341 struct loop *loop = NULL;
7342 struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7343 bool nested_in_vect_loop = false;
7344 tree elem_type;
7345 tree new_temp;
7346 machine_mode mode;
7347 tree dummy;
7348 enum dr_alignment_support alignment_support_scheme;
7349 tree dataref_ptr = NULL_TREE;
7350 tree dataref_offset = NULL_TREE;
7351 gimple *ptr_incr = NULL;
7352 int ncopies;
7353 int i, j;
7354 unsigned int group_size;
7355 poly_uint64 group_gap_adj;
7356 tree msq = NULL_TREE, lsq;
7357 tree offset = NULL_TREE;
7358 tree byte_offset = NULL_TREE;
7359 tree realignment_token = NULL_TREE;
7360 gphi *phi = NULL;
7361 vec<tree> dr_chain = vNULL;
7362 bool grouped_load = false;
7363 stmt_vec_info first_stmt_info;
7364 stmt_vec_info first_stmt_info_for_drptr = NULL;
7365 bool compute_in_loop = false;
7366 struct loop *at_loop;
7367 int vec_num;
7368 bool slp = (slp_node != NULL);
7369 bool slp_perm = false;
7370 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7371 poly_uint64 vf;
7372 tree aggr_type;
7373 gather_scatter_info gs_info;
7374 vec_info *vinfo = stmt_info->vinfo;
7375 tree ref_type;
7376 enum vect_def_type mask_dt = vect_unknown_def_type;
7378 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7379 return false;
7381 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7382 && ! vec_stmt)
7383 return false;
7385 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7386 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7388 scalar_dest = gimple_assign_lhs (assign);
7389 if (TREE_CODE (scalar_dest) != SSA_NAME)
7390 return false;
7392 tree_code code = gimple_assign_rhs_code (assign);
7393 if (code != ARRAY_REF
7394 && code != BIT_FIELD_REF
7395 && code != INDIRECT_REF
7396 && code != COMPONENT_REF
7397 && code != IMAGPART_EXPR
7398 && code != REALPART_EXPR
7399 && code != MEM_REF
7400 && TREE_CODE_CLASS (code) != tcc_declaration)
7401 return false;
7403 else
7405 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7406 if (!call || !gimple_call_internal_p (call))
7407 return false;
7409 internal_fn ifn = gimple_call_internal_fn (call);
7410 if (!internal_load_fn_p (ifn))
7411 return false;
7413 scalar_dest = gimple_call_lhs (call);
7414 if (!scalar_dest)
7415 return false;
7417 if (slp_node != NULL)
7419 if (dump_enabled_p ())
7420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7421 "SLP of masked loads not supported.\n");
7422 return false;
7425 int mask_index = internal_fn_mask_index (ifn);
7426 if (mask_index >= 0)
7428 mask = gimple_call_arg (call, mask_index);
7429 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7430 &mask_vectype))
7431 return false;
7435 if (!STMT_VINFO_DATA_REF (stmt_info))
7436 return false;
7438 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7439 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7441 if (loop_vinfo)
7443 loop = LOOP_VINFO_LOOP (loop_vinfo);
7444 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7445 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7447 else
7448 vf = 1;
7450 /* Multiple types in SLP are handled by creating the appropriate number of
7451 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7452 case of SLP. */
7453 if (slp)
7454 ncopies = 1;
7455 else
7456 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7458 gcc_assert (ncopies >= 1);
7460 /* FORNOW. This restriction should be relaxed. */
7461 if (nested_in_vect_loop && ncopies > 1)
7463 if (dump_enabled_p ())
7464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7465 "multiple types in nested loop.\n");
7466 return false;
7469 /* Invalidate assumptions made by dependence analysis when vectorization
7470 on the unrolled body effectively re-orders stmts. */
7471 if (ncopies > 1
7472 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7473 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7474 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7476 if (dump_enabled_p ())
7477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7478 "cannot perform implicit CSE when unrolling "
7479 "with negative dependence distance\n");
7480 return false;
7483 elem_type = TREE_TYPE (vectype);
7484 mode = TYPE_MODE (vectype);
7486 /* FORNOW. In some cases can vectorize even if data-type not supported
7487 (e.g. - data copies). */
7488 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7490 if (dump_enabled_p ())
7491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7492 "Aligned load, but unsupported type.\n");
7493 return false;
7496 /* Check if the load is a part of an interleaving chain. */
7497 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7499 grouped_load = true;
7500 /* FORNOW */
7501 gcc_assert (!nested_in_vect_loop);
7502 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7504 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7505 group_size = DR_GROUP_SIZE (first_stmt_info);
7507 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7508 slp_perm = true;
7510 /* Invalidate assumptions made by dependence analysis when vectorization
7511 on the unrolled body effectively re-orders stmts. */
7512 if (!PURE_SLP_STMT (stmt_info)
7513 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7514 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7515 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7517 if (dump_enabled_p ())
7518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7519 "cannot perform implicit CSE when performing "
7520 "group loads with negative dependence distance\n");
7521 return false;
7524 /* Similarly when the stmt is a load that is both part of a SLP
7525 instance and a loop vectorized stmt via the same-dr mechanism
7526 we have to give up. */
7527 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7528 && (STMT_SLP_TYPE (stmt_info)
7529 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
7531 if (dump_enabled_p ())
7532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7533 "conflicting SLP types for CSEd load\n");
7534 return false;
7537 else
7538 group_size = 1;
7540 vect_memory_access_type memory_access_type;
7541 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7542 &memory_access_type, &gs_info))
7543 return false;
7545 if (mask)
7547 if (memory_access_type == VMAT_CONTIGUOUS)
7549 machine_mode vec_mode = TYPE_MODE (vectype);
7550 if (!VECTOR_MODE_P (vec_mode)
7551 || !can_vec_mask_load_store_p (vec_mode,
7552 TYPE_MODE (mask_vectype), true))
7553 return false;
7555 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7557 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7558 tree masktype
7559 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7560 if (TREE_CODE (masktype) == INTEGER_TYPE)
7562 if (dump_enabled_p ())
7563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7564 "masked gather with integer mask not"
7565 " supported.");
7566 return false;
7569 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7570 && memory_access_type != VMAT_GATHER_SCATTER)
7572 if (dump_enabled_p ())
7573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7574 "unsupported access type for masked load.\n");
7575 return false;
7579 if (!vec_stmt) /* transformation not required. */
7581 if (!slp)
7582 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7584 if (loop_vinfo
7585 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7586 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7587 memory_access_type, &gs_info);
7589 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7590 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7591 slp_node_instance, slp_node, cost_vec);
7592 return true;
7595 if (!slp)
7596 gcc_assert (memory_access_type
7597 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7599 if (dump_enabled_p ())
7600 dump_printf_loc (MSG_NOTE, vect_location,
7601 "transform load. ncopies = %d\n", ncopies);
7603 /* Transform. */
7605 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7606 ensure_base_align (dr_info);
7608 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7610 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7611 return true;
7614 if (memory_access_type == VMAT_INVARIANT)
7616 gcc_assert (!grouped_load && !mask && !bb_vinfo);
7617 /* If we have versioned for aliasing or the loop doesn't
7618 have any data dependencies that would preclude this,
7619 then we are sure this is a loop invariant load and
7620 thus we can insert it on the preheader edge. */
7621 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7622 && !nested_in_vect_loop
7623 && hoist_defs_of_uses (stmt_info, loop));
7624 if (hoist_p)
7626 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7627 if (dump_enabled_p ())
7628 dump_printf_loc (MSG_NOTE, vect_location,
7629 "hoisting out of the vectorized loop: %G", stmt);
7630 scalar_dest = copy_ssa_name (scalar_dest);
7631 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7632 gsi_insert_on_edge_immediate
7633 (loop_preheader_edge (loop),
7634 gimple_build_assign (scalar_dest, rhs));
7636 /* These copies are all equivalent, but currently the representation
7637 requires a separate STMT_VINFO_VEC_STMT for each one. */
7638 prev_stmt_info = NULL;
7639 gimple_stmt_iterator gsi2 = *gsi;
7640 gsi_next (&gsi2);
7641 for (j = 0; j < ncopies; j++)
7643 stmt_vec_info new_stmt_info;
7644 if (hoist_p)
7646 new_temp = vect_init_vector (stmt_info, scalar_dest,
7647 vectype, NULL);
7648 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7649 new_stmt_info = vinfo->add_stmt (new_stmt);
7651 else
7653 new_temp = vect_init_vector (stmt_info, scalar_dest,
7654 vectype, &gsi2);
7655 new_stmt_info = vinfo->lookup_def (new_temp);
7657 if (slp)
7658 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7659 else if (j == 0)
7660 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7661 else
7662 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7663 prev_stmt_info = new_stmt_info;
7665 return true;
7668 if (memory_access_type == VMAT_ELEMENTWISE
7669 || memory_access_type == VMAT_STRIDED_SLP)
7671 gimple_stmt_iterator incr_gsi;
7672 bool insert_after;
7673 gimple *incr;
7674 tree offvar;
7675 tree ivstep;
7676 tree running_off;
7677 vec<constructor_elt, va_gc> *v = NULL;
7678 tree stride_base, stride_step, alias_off;
7679 /* Checked by get_load_store_type. */
7680 unsigned int const_nunits = nunits.to_constant ();
7681 unsigned HOST_WIDE_INT cst_offset = 0;
7683 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7684 gcc_assert (!nested_in_vect_loop);
7686 if (grouped_load)
7688 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7689 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7691 else
7693 first_stmt_info = stmt_info;
7694 first_dr_info = dr_info;
7696 if (slp && grouped_load)
7698 group_size = DR_GROUP_SIZE (first_stmt_info);
7699 ref_type = get_group_alias_ptr_type (first_stmt_info);
7701 else
7703 if (grouped_load)
7704 cst_offset
7705 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7706 * vect_get_place_in_interleaving_chain (stmt_info,
7707 first_stmt_info));
7708 group_size = 1;
7709 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7712 stride_base
7713 = fold_build_pointer_plus
7714 (DR_BASE_ADDRESS (first_dr_info->dr),
7715 size_binop (PLUS_EXPR,
7716 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7717 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7718 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7720 /* For a load with loop-invariant (but other than power-of-2)
7721 stride (i.e. not a grouped access) like so:
7723 for (i = 0; i < n; i += stride)
7724 ... = array[i];
7726 we generate a new induction variable and new accesses to
7727 form a new vector (or vectors, depending on ncopies):
7729 for (j = 0; ; j += VF*stride)
7730 tmp1 = array[j];
7731 tmp2 = array[j + stride];
7733 vectemp = {tmp1, tmp2, ...}
7736 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7737 build_int_cst (TREE_TYPE (stride_step), vf));
7739 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7741 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7742 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7743 create_iv (stride_base, ivstep, NULL,
7744 loop, &incr_gsi, insert_after,
7745 &offvar, NULL);
7746 incr = gsi_stmt (incr_gsi);
7747 loop_vinfo->add_stmt (incr);
7749 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7751 prev_stmt_info = NULL;
7752 running_off = offvar;
7753 alias_off = build_int_cst (ref_type, 0);
7754 int nloads = const_nunits;
7755 int lnel = 1;
7756 tree ltype = TREE_TYPE (vectype);
7757 tree lvectype = vectype;
7758 auto_vec<tree> dr_chain;
7759 if (memory_access_type == VMAT_STRIDED_SLP)
7761 if (group_size < const_nunits)
7763 /* First check if vec_init optab supports construction from
7764 vector elts directly. */
7765 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7766 machine_mode vmode;
7767 if (mode_for_vector (elmode, group_size).exists (&vmode)
7768 && VECTOR_MODE_P (vmode)
7769 && targetm.vector_mode_supported_p (vmode)
7770 && (convert_optab_handler (vec_init_optab,
7771 TYPE_MODE (vectype), vmode)
7772 != CODE_FOR_nothing))
7774 nloads = const_nunits / group_size;
7775 lnel = group_size;
7776 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7778 else
7780 /* Otherwise avoid emitting a constructor of vector elements
7781 by performing the loads using an integer type of the same
7782 size, constructing a vector of those and then
7783 re-interpreting it as the original vector type.
7784 This avoids a huge runtime penalty due to the general
7785 inability to perform store forwarding from smaller stores
7786 to a larger load. */
7787 unsigned lsize
7788 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7789 elmode = int_mode_for_size (lsize, 0).require ();
7790 unsigned int lnunits = const_nunits / group_size;
7791 /* If we can't construct such a vector fall back to
7792 element loads of the original vector type. */
7793 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7794 && VECTOR_MODE_P (vmode)
7795 && targetm.vector_mode_supported_p (vmode)
7796 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7797 != CODE_FOR_nothing))
7799 nloads = lnunits;
7800 lnel = group_size;
7801 ltype = build_nonstandard_integer_type (lsize, 1);
7802 lvectype = build_vector_type (ltype, nloads);
7806 else
7808 nloads = 1;
7809 lnel = const_nunits;
7810 ltype = vectype;
7812 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7814 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7815 else if (nloads == 1)
7816 ltype = vectype;
7818 if (slp)
7820 /* For SLP permutation support we need to load the whole group,
7821 not only the number of vector stmts the permutation result
7822 fits in. */
7823 if (slp_perm)
7825 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7826 variable VF. */
7827 unsigned int const_vf = vf.to_constant ();
7828 ncopies = CEIL (group_size * const_vf, const_nunits);
7829 dr_chain.create (ncopies);
7831 else
7832 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7834 unsigned int group_el = 0;
7835 unsigned HOST_WIDE_INT
7836 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7837 for (j = 0; j < ncopies; j++)
7839 if (nloads > 1)
7840 vec_alloc (v, nloads);
7841 stmt_vec_info new_stmt_info = NULL;
7842 for (i = 0; i < nloads; i++)
7844 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7845 group_el * elsz + cst_offset);
7846 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7847 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7848 gassign *new_stmt
7849 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7850 new_stmt_info
7851 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7852 if (nloads > 1)
7853 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7854 gimple_assign_lhs (new_stmt));
7856 group_el += lnel;
7857 if (! slp
7858 || group_el == group_size)
7860 tree newoff = copy_ssa_name (running_off);
7861 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7862 running_off, stride_step);
7863 vect_finish_stmt_generation (stmt_info, incr, gsi);
7865 running_off = newoff;
7866 group_el = 0;
7869 if (nloads > 1)
7871 tree vec_inv = build_constructor (lvectype, v);
7872 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
7873 new_stmt_info = vinfo->lookup_def (new_temp);
7874 if (lvectype != vectype)
7876 gassign *new_stmt
7877 = gimple_build_assign (make_ssa_name (vectype),
7878 VIEW_CONVERT_EXPR,
7879 build1 (VIEW_CONVERT_EXPR,
7880 vectype, new_temp));
7881 new_stmt_info
7882 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7886 if (slp)
7888 if (slp_perm)
7889 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
7890 else
7891 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7893 else
7895 if (j == 0)
7896 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7897 else
7898 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7899 prev_stmt_info = new_stmt_info;
7902 if (slp_perm)
7904 unsigned n_perms;
7905 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7906 slp_node_instance, false, &n_perms);
7908 return true;
7911 if (memory_access_type == VMAT_GATHER_SCATTER
7912 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7913 grouped_load = false;
7915 if (grouped_load)
7917 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7918 group_size = DR_GROUP_SIZE (first_stmt_info);
7919 /* For SLP vectorization we directly vectorize a subchain
7920 without permutation. */
7921 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7922 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7923 /* For BB vectorization always use the first stmt to base
7924 the data ref pointer on. */
7925 if (bb_vinfo)
7926 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7928 /* Check if the chain of loads is already vectorized. */
7929 if (STMT_VINFO_VEC_STMT (first_stmt_info)
7930 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7931 ??? But we can only do so if there is exactly one
7932 as we have no way to get at the rest. Leave the CSE
7933 opportunity alone.
7934 ??? With the group load eventually participating
7935 in multiple different permutations (having multiple
7936 slp nodes which refer to the same group) the CSE
7937 is even wrong code. See PR56270. */
7938 && !slp)
7940 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7941 return true;
7943 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7944 group_gap_adj = 0;
7946 /* VEC_NUM is the number of vect stmts to be created for this group. */
7947 if (slp)
7949 grouped_load = false;
7950 /* If an SLP permutation is from N elements to N elements,
7951 and if one vector holds a whole number of N, we can load
7952 the inputs to the permutation in the same way as an
7953 unpermuted sequence. In other cases we need to load the
7954 whole group, not only the number of vector stmts the
7955 permutation result fits in. */
7956 if (slp_perm
7957 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
7958 || !multiple_p (nunits, group_size)))
7960 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
7961 variable VF; see vect_transform_slp_perm_load. */
7962 unsigned int const_vf = vf.to_constant ();
7963 unsigned int const_nunits = nunits.to_constant ();
7964 vec_num = CEIL (group_size * const_vf, const_nunits);
7965 group_gap_adj = vf * group_size - nunits * vec_num;
7967 else
7969 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7970 group_gap_adj
7971 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7974 else
7975 vec_num = group_size;
7977 ref_type = get_group_alias_ptr_type (first_stmt_info);
7979 else
7981 first_stmt_info = stmt_info;
7982 first_dr_info = dr_info;
7983 group_size = vec_num = 1;
7984 group_gap_adj = 0;
7985 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7988 alignment_support_scheme
7989 = vect_supportable_dr_alignment (first_dr_info, false);
7990 gcc_assert (alignment_support_scheme);
7991 vec_loop_masks *loop_masks
7992 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7993 ? &LOOP_VINFO_MASKS (loop_vinfo)
7994 : NULL);
7995 /* Targets with store-lane instructions must not require explicit
7996 realignment. vect_supportable_dr_alignment always returns either
7997 dr_aligned or dr_unaligned_supported for masked operations. */
7998 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7999 && !mask
8000 && !loop_masks)
8001 || alignment_support_scheme == dr_aligned
8002 || alignment_support_scheme == dr_unaligned_supported);
8004 /* In case the vectorization factor (VF) is bigger than the number
8005 of elements that we can fit in a vectype (nunits), we have to generate
8006 more than one vector stmt - i.e - we need to "unroll" the
8007 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8008 from one copy of the vector stmt to the next, in the field
8009 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8010 stages to find the correct vector defs to be used when vectorizing
8011 stmts that use the defs of the current stmt. The example below
8012 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8013 need to create 4 vectorized stmts):
8015 before vectorization:
8016 RELATED_STMT VEC_STMT
8017 S1: x = memref - -
8018 S2: z = x + 1 - -
8020 step 1: vectorize stmt S1:
8021 We first create the vector stmt VS1_0, and, as usual, record a
8022 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8023 Next, we create the vector stmt VS1_1, and record a pointer to
8024 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8025 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8026 stmts and pointers:
8027 RELATED_STMT VEC_STMT
8028 VS1_0: vx0 = memref0 VS1_1 -
8029 VS1_1: vx1 = memref1 VS1_2 -
8030 VS1_2: vx2 = memref2 VS1_3 -
8031 VS1_3: vx3 = memref3 - -
8032 S1: x = load - VS1_0
8033 S2: z = x + 1 - -
8035 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8036 information we recorded in RELATED_STMT field is used to vectorize
8037 stmt S2. */
8039 /* In case of interleaving (non-unit grouped access):
8041 S1: x2 = &base + 2
8042 S2: x0 = &base
8043 S3: x1 = &base + 1
8044 S4: x3 = &base + 3
8046 Vectorized loads are created in the order of memory accesses
8047 starting from the access of the first stmt of the chain:
8049 VS1: vx0 = &base
8050 VS2: vx1 = &base + vec_size*1
8051 VS3: vx3 = &base + vec_size*2
8052 VS4: vx4 = &base + vec_size*3
8054 Then permutation statements are generated:
8056 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8057 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8060 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8061 (the order of the data-refs in the output of vect_permute_load_chain
8062 corresponds to the order of scalar stmts in the interleaving chain - see
8063 the documentation of vect_permute_load_chain()).
8064 The generation of permutation stmts and recording them in
8065 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8067 In case of both multiple types and interleaving, the vector loads and
8068 permutation stmts above are created for every copy. The result vector
8069 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8070 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8072 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8073 on a target that supports unaligned accesses (dr_unaligned_supported)
8074 we generate the following code:
8075 p = initial_addr;
8076 indx = 0;
8077 loop {
8078 p = p + indx * vectype_size;
8079 vec_dest = *(p);
8080 indx = indx + 1;
8083 Otherwise, the data reference is potentially unaligned on a target that
8084 does not support unaligned accesses (dr_explicit_realign_optimized) -
8085 then generate the following code, in which the data in each iteration is
8086 obtained by two vector loads, one from the previous iteration, and one
8087 from the current iteration:
8088 p1 = initial_addr;
8089 msq_init = *(floor(p1))
8090 p2 = initial_addr + VS - 1;
8091 realignment_token = call target_builtin;
8092 indx = 0;
8093 loop {
8094 p2 = p2 + indx * vectype_size
8095 lsq = *(floor(p2))
8096 vec_dest = realign_load (msq, lsq, realignment_token)
8097 indx = indx + 1;
8098 msq = lsq;
8099 } */
8101 /* If the misalignment remains the same throughout the execution of the
8102 loop, we can create the init_addr and permutation mask at the loop
8103 preheader. Otherwise, it needs to be created inside the loop.
8104 This can only occur when vectorizing memory accesses in the inner-loop
8105 nested within an outer-loop that is being vectorized. */
8107 if (nested_in_vect_loop
8108 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8109 GET_MODE_SIZE (TYPE_MODE (vectype))))
8111 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8112 compute_in_loop = true;
8115 if ((alignment_support_scheme == dr_explicit_realign_optimized
8116 || alignment_support_scheme == dr_explicit_realign)
8117 && !compute_in_loop)
8119 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8120 alignment_support_scheme, NULL_TREE,
8121 &at_loop);
8122 if (alignment_support_scheme == dr_explicit_realign_optimized)
8124 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8125 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8126 size_one_node);
8129 else
8130 at_loop = loop;
8132 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8133 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8135 tree bump;
8136 tree vec_offset = NULL_TREE;
8137 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8139 aggr_type = NULL_TREE;
8140 bump = NULL_TREE;
8142 else if (memory_access_type == VMAT_GATHER_SCATTER)
8144 aggr_type = elem_type;
8145 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8146 &bump, &vec_offset);
8148 else
8150 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8151 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8152 else
8153 aggr_type = vectype;
8154 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8155 memory_access_type);
8158 tree vec_mask = NULL_TREE;
8159 prev_stmt_info = NULL;
8160 poly_uint64 group_elt = 0;
8161 for (j = 0; j < ncopies; j++)
8163 stmt_vec_info new_stmt_info = NULL;
8164 /* 1. Create the vector or array pointer update chain. */
8165 if (j == 0)
8167 bool simd_lane_access_p
8168 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8169 if (simd_lane_access_p
8170 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8171 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8172 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8173 && integer_zerop (DR_INIT (first_dr_info->dr))
8174 && alias_sets_conflict_p (get_alias_set (aggr_type),
8175 get_alias_set (TREE_TYPE (ref_type)))
8176 && (alignment_support_scheme == dr_aligned
8177 || alignment_support_scheme == dr_unaligned_supported))
8179 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8180 dataref_offset = build_int_cst (ref_type, 0);
8182 else if (first_stmt_info_for_drptr
8183 && first_stmt_info != first_stmt_info_for_drptr)
8185 dataref_ptr
8186 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8187 aggr_type, at_loop, offset, &dummy,
8188 gsi, &ptr_incr, simd_lane_access_p,
8189 byte_offset, bump);
8190 /* Adjust the pointer by the difference to first_stmt. */
8191 data_reference_p ptrdr
8192 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8193 tree diff
8194 = fold_convert (sizetype,
8195 size_binop (MINUS_EXPR,
8196 DR_INIT (first_dr_info->dr),
8197 DR_INIT (ptrdr)));
8198 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8199 stmt_info, diff);
8201 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8202 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8203 &dataref_ptr, &vec_offset);
8204 else
8205 dataref_ptr
8206 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8207 offset, &dummy, gsi, &ptr_incr,
8208 simd_lane_access_p,
8209 byte_offset, bump);
8210 if (mask)
8211 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8212 mask_vectype);
8214 else
8216 if (dataref_offset)
8217 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8218 bump);
8219 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8220 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8221 else
8222 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8223 stmt_info, bump);
8224 if (mask)
8225 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8228 if (grouped_load || slp_perm)
8229 dr_chain.create (vec_num);
8231 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8233 tree vec_array;
8235 vec_array = create_vector_array (vectype, vec_num);
8237 tree final_mask = NULL_TREE;
8238 if (loop_masks)
8239 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8240 vectype, j);
8241 if (vec_mask)
8242 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8243 vec_mask, gsi);
8245 gcall *call;
8246 if (final_mask)
8248 /* Emit:
8249 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8250 VEC_MASK). */
8251 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8252 tree alias_ptr = build_int_cst (ref_type, align);
8253 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8254 dataref_ptr, alias_ptr,
8255 final_mask);
8257 else
8259 /* Emit:
8260 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8261 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8262 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8264 gimple_call_set_lhs (call, vec_array);
8265 gimple_call_set_nothrow (call, true);
8266 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8268 /* Extract each vector into an SSA_NAME. */
8269 for (i = 0; i < vec_num; i++)
8271 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8272 vec_array, i);
8273 dr_chain.quick_push (new_temp);
8276 /* Record the mapping between SSA_NAMEs and statements. */
8277 vect_record_grouped_load_vectors (stmt_info, dr_chain);
8279 /* Record that VEC_ARRAY is now dead. */
8280 vect_clobber_variable (stmt_info, gsi, vec_array);
8282 else
8284 for (i = 0; i < vec_num; i++)
8286 tree final_mask = NULL_TREE;
8287 if (loop_masks
8288 && memory_access_type != VMAT_INVARIANT)
8289 final_mask = vect_get_loop_mask (gsi, loop_masks,
8290 vec_num * ncopies,
8291 vectype, vec_num * j + i);
8292 if (vec_mask)
8293 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8294 vec_mask, gsi);
8296 if (i > 0)
8297 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8298 stmt_info, bump);
8300 /* 2. Create the vector-load in the loop. */
8301 gimple *new_stmt = NULL;
8302 switch (alignment_support_scheme)
8304 case dr_aligned:
8305 case dr_unaligned_supported:
8307 unsigned int align, misalign;
8309 if (memory_access_type == VMAT_GATHER_SCATTER)
8311 tree scale = size_int (gs_info.scale);
8312 gcall *call;
8313 if (loop_masks)
8314 call = gimple_build_call_internal
8315 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8316 vec_offset, scale, final_mask);
8317 else
8318 call = gimple_build_call_internal
8319 (IFN_GATHER_LOAD, 3, dataref_ptr,
8320 vec_offset, scale);
8321 gimple_call_set_nothrow (call, true);
8322 new_stmt = call;
8323 data_ref = NULL_TREE;
8324 break;
8327 align = DR_TARGET_ALIGNMENT (dr_info);
8328 if (alignment_support_scheme == dr_aligned)
8330 gcc_assert (aligned_access_p (first_dr_info));
8331 misalign = 0;
8333 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8335 align = dr_alignment
8336 (vect_dr_behavior (first_dr_info));
8337 misalign = 0;
8339 else
8340 misalign = DR_MISALIGNMENT (first_dr_info);
8341 if (dataref_offset == NULL_TREE
8342 && TREE_CODE (dataref_ptr) == SSA_NAME)
8343 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8344 align, misalign);
8346 if (final_mask)
8348 align = least_bit_hwi (misalign | align);
8349 tree ptr = build_int_cst (ref_type, align);
8350 gcall *call
8351 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8352 dataref_ptr, ptr,
8353 final_mask);
8354 gimple_call_set_nothrow (call, true);
8355 new_stmt = call;
8356 data_ref = NULL_TREE;
8358 else
8360 data_ref
8361 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8362 dataref_offset
8363 ? dataref_offset
8364 : build_int_cst (ref_type, 0));
8365 if (alignment_support_scheme == dr_aligned)
8367 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8368 TREE_TYPE (data_ref)
8369 = build_aligned_type (TREE_TYPE (data_ref),
8370 align * BITS_PER_UNIT);
8371 else
8372 TREE_TYPE (data_ref)
8373 = build_aligned_type (TREE_TYPE (data_ref),
8374 TYPE_ALIGN (elem_type));
8376 break;
8378 case dr_explicit_realign:
8380 tree ptr, bump;
8382 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8384 if (compute_in_loop)
8385 msq = vect_setup_realignment (first_stmt_info, gsi,
8386 &realignment_token,
8387 dr_explicit_realign,
8388 dataref_ptr, NULL);
8390 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8391 ptr = copy_ssa_name (dataref_ptr);
8392 else
8393 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8394 unsigned int align = DR_TARGET_ALIGNMENT (first_dr_info);
8395 new_stmt = gimple_build_assign
8396 (ptr, BIT_AND_EXPR, dataref_ptr,
8397 build_int_cst
8398 (TREE_TYPE (dataref_ptr),
8399 -(HOST_WIDE_INT) align));
8400 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8401 data_ref
8402 = build2 (MEM_REF, vectype, ptr,
8403 build_int_cst (ref_type, 0));
8404 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8405 vec_dest = vect_create_destination_var (scalar_dest,
8406 vectype);
8407 new_stmt = gimple_build_assign (vec_dest, data_ref);
8408 new_temp = make_ssa_name (vec_dest, new_stmt);
8409 gimple_assign_set_lhs (new_stmt, new_temp);
8410 gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8411 gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8412 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8413 msq = new_temp;
8415 bump = size_binop (MULT_EXPR, vs,
8416 TYPE_SIZE_UNIT (elem_type));
8417 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8418 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8419 stmt_info, bump);
8420 new_stmt = gimple_build_assign
8421 (NULL_TREE, BIT_AND_EXPR, ptr,
8422 build_int_cst
8423 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8424 ptr = copy_ssa_name (ptr, new_stmt);
8425 gimple_assign_set_lhs (new_stmt, ptr);
8426 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8427 data_ref
8428 = build2 (MEM_REF, vectype, ptr,
8429 build_int_cst (ref_type, 0));
8430 break;
8432 case dr_explicit_realign_optimized:
8434 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8435 new_temp = copy_ssa_name (dataref_ptr);
8436 else
8437 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8438 unsigned int align = DR_TARGET_ALIGNMENT (first_dr_info);
8439 new_stmt = gimple_build_assign
8440 (new_temp, BIT_AND_EXPR, dataref_ptr,
8441 build_int_cst (TREE_TYPE (dataref_ptr),
8442 -(HOST_WIDE_INT) align));
8443 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8444 data_ref
8445 = build2 (MEM_REF, vectype, new_temp,
8446 build_int_cst (ref_type, 0));
8447 break;
8449 default:
8450 gcc_unreachable ();
8452 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8453 /* DATA_REF is null if we've already built the statement. */
8454 if (data_ref)
8456 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8457 new_stmt = gimple_build_assign (vec_dest, data_ref);
8459 new_temp = make_ssa_name (vec_dest, new_stmt);
8460 gimple_set_lhs (new_stmt, new_temp);
8461 new_stmt_info
8462 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8464 /* 3. Handle explicit realignment if necessary/supported.
8465 Create in loop:
8466 vec_dest = realign_load (msq, lsq, realignment_token) */
8467 if (alignment_support_scheme == dr_explicit_realign_optimized
8468 || alignment_support_scheme == dr_explicit_realign)
8470 lsq = gimple_assign_lhs (new_stmt);
8471 if (!realignment_token)
8472 realignment_token = dataref_ptr;
8473 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8474 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8475 msq, lsq, realignment_token);
8476 new_temp = make_ssa_name (vec_dest, new_stmt);
8477 gimple_assign_set_lhs (new_stmt, new_temp);
8478 new_stmt_info
8479 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8481 if (alignment_support_scheme == dr_explicit_realign_optimized)
8483 gcc_assert (phi);
8484 if (i == vec_num - 1 && j == ncopies - 1)
8485 add_phi_arg (phi, lsq,
8486 loop_latch_edge (containing_loop),
8487 UNKNOWN_LOCATION);
8488 msq = lsq;
8492 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8494 tree perm_mask = perm_mask_for_reverse (vectype);
8495 new_temp = permute_vec_elements (new_temp, new_temp,
8496 perm_mask, stmt_info, gsi);
8497 new_stmt_info = vinfo->lookup_def (new_temp);
8500 /* Collect vector loads and later create their permutation in
8501 vect_transform_grouped_load (). */
8502 if (grouped_load || slp_perm)
8503 dr_chain.quick_push (new_temp);
8505 /* Store vector loads in the corresponding SLP_NODE. */
8506 if (slp && !slp_perm)
8507 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8509 /* With SLP permutation we load the gaps as well, without
8510 we need to skip the gaps after we manage to fully load
8511 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8512 group_elt += nunits;
8513 if (maybe_ne (group_gap_adj, 0U)
8514 && !slp_perm
8515 && known_eq (group_elt, group_size - group_gap_adj))
8517 poly_wide_int bump_val
8518 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8519 * group_gap_adj);
8520 tree bump = wide_int_to_tree (sizetype, bump_val);
8521 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8522 stmt_info, bump);
8523 group_elt = 0;
8526 /* Bump the vector pointer to account for a gap or for excess
8527 elements loaded for a permuted SLP load. */
8528 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8530 poly_wide_int bump_val
8531 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8532 * group_gap_adj);
8533 tree bump = wide_int_to_tree (sizetype, bump_val);
8534 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8535 stmt_info, bump);
8539 if (slp && !slp_perm)
8540 continue;
8542 if (slp_perm)
8544 unsigned n_perms;
8545 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8546 slp_node_instance, false,
8547 &n_perms))
8549 dr_chain.release ();
8550 return false;
8553 else
8555 if (grouped_load)
8557 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8558 vect_transform_grouped_load (stmt_info, dr_chain,
8559 group_size, gsi);
8560 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8562 else
8564 if (j == 0)
8565 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8566 else
8567 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8568 prev_stmt_info = new_stmt_info;
8571 dr_chain.release ();
8574 return true;
8577 /* Function vect_is_simple_cond.
8579 Input:
8580 LOOP - the loop that is being vectorized.
8581 COND - Condition that is checked for simple use.
8583 Output:
8584 *COMP_VECTYPE - the vector type for the comparison.
8585 *DTS - The def types for the arguments of the comparison
8587 Returns whether a COND can be vectorized. Checks whether
8588 condition operands are supportable using vec_is_simple_use. */
8590 static bool
8591 vect_is_simple_cond (tree cond, vec_info *vinfo,
8592 tree *comp_vectype, enum vect_def_type *dts,
8593 tree vectype)
8595 tree lhs, rhs;
8596 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8598 /* Mask case. */
8599 if (TREE_CODE (cond) == SSA_NAME
8600 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8602 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8603 || !*comp_vectype
8604 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8605 return false;
8606 return true;
8609 if (!COMPARISON_CLASS_P (cond))
8610 return false;
8612 lhs = TREE_OPERAND (cond, 0);
8613 rhs = TREE_OPERAND (cond, 1);
8615 if (TREE_CODE (lhs) == SSA_NAME)
8617 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8618 return false;
8620 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8621 || TREE_CODE (lhs) == FIXED_CST)
8622 dts[0] = vect_constant_def;
8623 else
8624 return false;
8626 if (TREE_CODE (rhs) == SSA_NAME)
8628 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8629 return false;
8631 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8632 || TREE_CODE (rhs) == FIXED_CST)
8633 dts[1] = vect_constant_def;
8634 else
8635 return false;
8637 if (vectype1 && vectype2
8638 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8639 TYPE_VECTOR_SUBPARTS (vectype2)))
8640 return false;
8642 *comp_vectype = vectype1 ? vectype1 : vectype2;
8643 /* Invariant comparison. */
8644 if (! *comp_vectype && vectype)
8646 tree scalar_type = TREE_TYPE (lhs);
8647 /* If we can widen the comparison to match vectype do so. */
8648 if (INTEGRAL_TYPE_P (scalar_type)
8649 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8650 TYPE_SIZE (TREE_TYPE (vectype))))
8651 scalar_type = build_nonstandard_integer_type
8652 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8653 TYPE_UNSIGNED (scalar_type));
8654 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8657 return true;
8660 /* vectorizable_condition.
8662 Check if STMT_INFO is conditional modify expression that can be vectorized.
8663 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8664 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8665 at GSI.
8667 When STMT_INFO is vectorized as a nested cycle, REDUC_DEF is the vector
8668 variable to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1,
8669 and in else clause if it is 2).
8671 Return true if STMT_INFO is vectorizable in this way. */
8673 bool
8674 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8675 stmt_vec_info *vec_stmt, tree reduc_def,
8676 int reduc_index, slp_tree slp_node,
8677 stmt_vector_for_cost *cost_vec)
8679 vec_info *vinfo = stmt_info->vinfo;
8680 tree scalar_dest = NULL_TREE;
8681 tree vec_dest = NULL_TREE;
8682 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8683 tree then_clause, else_clause;
8684 tree comp_vectype = NULL_TREE;
8685 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8686 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8687 tree vec_compare;
8688 tree new_temp;
8689 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8690 enum vect_def_type dts[4]
8691 = {vect_unknown_def_type, vect_unknown_def_type,
8692 vect_unknown_def_type, vect_unknown_def_type};
8693 int ndts = 4;
8694 int ncopies;
8695 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8696 stmt_vec_info prev_stmt_info = NULL;
8697 int i, j;
8698 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8699 vec<tree> vec_oprnds0 = vNULL;
8700 vec<tree> vec_oprnds1 = vNULL;
8701 vec<tree> vec_oprnds2 = vNULL;
8702 vec<tree> vec_oprnds3 = vNULL;
8703 tree vec_cmp_type;
8704 bool masked = false;
8706 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8707 return false;
8709 vect_reduction_type reduction_type
8710 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8711 if (reduction_type == TREE_CODE_REDUCTION)
8713 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8714 return false;
8716 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8717 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8718 && reduc_def))
8719 return false;
8721 /* FORNOW: not yet supported. */
8722 if (STMT_VINFO_LIVE_P (stmt_info))
8724 if (dump_enabled_p ())
8725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8726 "value used after loop.\n");
8727 return false;
8731 /* Is vectorizable conditional operation? */
8732 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8733 if (!stmt)
8734 return false;
8736 code = gimple_assign_rhs_code (stmt);
8738 if (code != COND_EXPR)
8739 return false;
8741 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8742 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8744 if (slp_node)
8745 ncopies = 1;
8746 else
8747 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8749 gcc_assert (ncopies >= 1);
8750 if (reduc_index && ncopies > 1)
8751 return false; /* FORNOW */
8753 cond_expr = gimple_assign_rhs1 (stmt);
8754 then_clause = gimple_assign_rhs2 (stmt);
8755 else_clause = gimple_assign_rhs3 (stmt);
8757 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8758 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8759 || !comp_vectype)
8760 return false;
8762 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8763 return false;
8764 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8765 return false;
8767 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8768 return false;
8770 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8771 return false;
8773 masked = !COMPARISON_CLASS_P (cond_expr);
8774 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8776 if (vec_cmp_type == NULL_TREE)
8777 return false;
8779 cond_code = TREE_CODE (cond_expr);
8780 if (!masked)
8782 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8783 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8786 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8788 /* Boolean values may have another representation in vectors
8789 and therefore we prefer bit operations over comparison for
8790 them (which also works for scalar masks). We store opcodes
8791 to use in bitop1 and bitop2. Statement is vectorized as
8792 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8793 depending on bitop1 and bitop2 arity. */
8794 switch (cond_code)
8796 case GT_EXPR:
8797 bitop1 = BIT_NOT_EXPR;
8798 bitop2 = BIT_AND_EXPR;
8799 break;
8800 case GE_EXPR:
8801 bitop1 = BIT_NOT_EXPR;
8802 bitop2 = BIT_IOR_EXPR;
8803 break;
8804 case LT_EXPR:
8805 bitop1 = BIT_NOT_EXPR;
8806 bitop2 = BIT_AND_EXPR;
8807 std::swap (cond_expr0, cond_expr1);
8808 break;
8809 case LE_EXPR:
8810 bitop1 = BIT_NOT_EXPR;
8811 bitop2 = BIT_IOR_EXPR;
8812 std::swap (cond_expr0, cond_expr1);
8813 break;
8814 case NE_EXPR:
8815 bitop1 = BIT_XOR_EXPR;
8816 break;
8817 case EQ_EXPR:
8818 bitop1 = BIT_XOR_EXPR;
8819 bitop2 = BIT_NOT_EXPR;
8820 break;
8821 default:
8822 return false;
8824 cond_code = SSA_NAME;
8827 if (!vec_stmt)
8829 if (bitop1 != NOP_EXPR)
8831 machine_mode mode = TYPE_MODE (comp_vectype);
8832 optab optab;
8834 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8835 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8836 return false;
8838 if (bitop2 != NOP_EXPR)
8840 optab = optab_for_tree_code (bitop2, comp_vectype,
8841 optab_default);
8842 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8843 return false;
8846 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8847 cond_code))
8849 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8850 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8851 cost_vec);
8852 return true;
8854 return false;
8857 /* Transform. */
8859 if (!slp_node)
8861 vec_oprnds0.create (1);
8862 vec_oprnds1.create (1);
8863 vec_oprnds2.create (1);
8864 vec_oprnds3.create (1);
8867 /* Handle def. */
8868 scalar_dest = gimple_assign_lhs (stmt);
8869 if (reduction_type != EXTRACT_LAST_REDUCTION)
8870 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8872 /* Handle cond expr. */
8873 for (j = 0; j < ncopies; j++)
8875 stmt_vec_info new_stmt_info = NULL;
8876 if (j == 0)
8878 if (slp_node)
8880 auto_vec<tree, 4> ops;
8881 auto_vec<vec<tree>, 4> vec_defs;
8883 if (masked)
8884 ops.safe_push (cond_expr);
8885 else
8887 ops.safe_push (cond_expr0);
8888 ops.safe_push (cond_expr1);
8890 ops.safe_push (then_clause);
8891 ops.safe_push (else_clause);
8892 vect_get_slp_defs (ops, slp_node, &vec_defs);
8893 vec_oprnds3 = vec_defs.pop ();
8894 vec_oprnds2 = vec_defs.pop ();
8895 if (!masked)
8896 vec_oprnds1 = vec_defs.pop ();
8897 vec_oprnds0 = vec_defs.pop ();
8899 else
8901 if (masked)
8903 vec_cond_lhs
8904 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
8905 comp_vectype);
8906 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
8908 else
8910 vec_cond_lhs
8911 = vect_get_vec_def_for_operand (cond_expr0,
8912 stmt_info, comp_vectype);
8913 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
8915 vec_cond_rhs
8916 = vect_get_vec_def_for_operand (cond_expr1,
8917 stmt_info, comp_vectype);
8918 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
8920 if (reduc_index == 1)
8921 vec_then_clause = reduc_def;
8922 else
8924 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8925 stmt_info);
8926 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
8928 if (reduc_index == 2)
8929 vec_else_clause = reduc_def;
8930 else
8932 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8933 stmt_info);
8934 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
8938 else
8940 vec_cond_lhs
8941 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
8942 if (!masked)
8943 vec_cond_rhs
8944 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
8946 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
8947 vec_oprnds2.pop ());
8948 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
8949 vec_oprnds3.pop ());
8952 if (!slp_node)
8954 vec_oprnds0.quick_push (vec_cond_lhs);
8955 if (!masked)
8956 vec_oprnds1.quick_push (vec_cond_rhs);
8957 vec_oprnds2.quick_push (vec_then_clause);
8958 vec_oprnds3.quick_push (vec_else_clause);
8961 /* Arguments are ready. Create the new vector stmt. */
8962 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8964 vec_then_clause = vec_oprnds2[i];
8965 vec_else_clause = vec_oprnds3[i];
8967 if (masked)
8968 vec_compare = vec_cond_lhs;
8969 else
8971 vec_cond_rhs = vec_oprnds1[i];
8972 if (bitop1 == NOP_EXPR)
8973 vec_compare = build2 (cond_code, vec_cmp_type,
8974 vec_cond_lhs, vec_cond_rhs);
8975 else
8977 new_temp = make_ssa_name (vec_cmp_type);
8978 gassign *new_stmt;
8979 if (bitop1 == BIT_NOT_EXPR)
8980 new_stmt = gimple_build_assign (new_temp, bitop1,
8981 vec_cond_rhs);
8982 else
8983 new_stmt
8984 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8985 vec_cond_rhs);
8986 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8987 if (bitop2 == NOP_EXPR)
8988 vec_compare = new_temp;
8989 else if (bitop2 == BIT_NOT_EXPR)
8991 /* Instead of doing ~x ? y : z do x ? z : y. */
8992 vec_compare = new_temp;
8993 std::swap (vec_then_clause, vec_else_clause);
8995 else
8997 vec_compare = make_ssa_name (vec_cmp_type);
8998 new_stmt
8999 = gimple_build_assign (vec_compare, bitop2,
9000 vec_cond_lhs, new_temp);
9001 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9005 if (reduction_type == EXTRACT_LAST_REDUCTION)
9007 if (!is_gimple_val (vec_compare))
9009 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9010 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9011 vec_compare);
9012 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9013 vec_compare = vec_compare_name;
9015 gcc_assert (reduc_index == 2);
9016 gcall *new_stmt = gimple_build_call_internal
9017 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9018 vec_then_clause);
9019 gimple_call_set_lhs (new_stmt, scalar_dest);
9020 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9021 if (stmt_info->stmt == gsi_stmt (*gsi))
9022 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9023 else
9025 /* In this case we're moving the definition to later in the
9026 block. That doesn't matter because the only uses of the
9027 lhs are in phi statements. */
9028 gimple_stmt_iterator old_gsi
9029 = gsi_for_stmt (stmt_info->stmt);
9030 gsi_remove (&old_gsi, true);
9031 new_stmt_info
9032 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9035 else
9037 new_temp = make_ssa_name (vec_dest);
9038 gassign *new_stmt
9039 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9040 vec_then_clause, vec_else_clause);
9041 new_stmt_info
9042 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9044 if (slp_node)
9045 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9048 if (slp_node)
9049 continue;
9051 if (j == 0)
9052 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9053 else
9054 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9056 prev_stmt_info = new_stmt_info;
9059 vec_oprnds0.release ();
9060 vec_oprnds1.release ();
9061 vec_oprnds2.release ();
9062 vec_oprnds3.release ();
9064 return true;
9067 /* vectorizable_comparison.
9069 Check if STMT_INFO is comparison expression that can be vectorized.
9070 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9071 comparison, put it in VEC_STMT, and insert it at GSI.
9073 Return true if STMT_INFO is vectorizable in this way. */
9075 static bool
9076 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9077 stmt_vec_info *vec_stmt, tree reduc_def,
9078 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9080 vec_info *vinfo = stmt_info->vinfo;
9081 tree lhs, rhs1, rhs2;
9082 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9083 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9084 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9085 tree new_temp;
9086 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9087 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9088 int ndts = 2;
9089 poly_uint64 nunits;
9090 int ncopies;
9091 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9092 stmt_vec_info prev_stmt_info = NULL;
9093 int i, j;
9094 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9095 vec<tree> vec_oprnds0 = vNULL;
9096 vec<tree> vec_oprnds1 = vNULL;
9097 tree mask_type;
9098 tree mask;
9100 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9101 return false;
9103 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9104 return false;
9106 mask_type = vectype;
9107 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9109 if (slp_node)
9110 ncopies = 1;
9111 else
9112 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9114 gcc_assert (ncopies >= 1);
9115 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9116 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9117 && reduc_def))
9118 return false;
9120 if (STMT_VINFO_LIVE_P (stmt_info))
9122 if (dump_enabled_p ())
9123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9124 "value used after loop.\n");
9125 return false;
9128 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9129 if (!stmt)
9130 return false;
9132 code = gimple_assign_rhs_code (stmt);
9134 if (TREE_CODE_CLASS (code) != tcc_comparison)
9135 return false;
9137 rhs1 = gimple_assign_rhs1 (stmt);
9138 rhs2 = gimple_assign_rhs2 (stmt);
9140 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9141 return false;
9143 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9144 return false;
9146 if (vectype1 && vectype2
9147 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9148 TYPE_VECTOR_SUBPARTS (vectype2)))
9149 return false;
9151 vectype = vectype1 ? vectype1 : vectype2;
9153 /* Invariant comparison. */
9154 if (!vectype)
9156 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9157 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9158 return false;
9160 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9161 return false;
9163 /* Can't compare mask and non-mask types. */
9164 if (vectype1 && vectype2
9165 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9166 return false;
9168 /* Boolean values may have another representation in vectors
9169 and therefore we prefer bit operations over comparison for
9170 them (which also works for scalar masks). We store opcodes
9171 to use in bitop1 and bitop2. Statement is vectorized as
9172 BITOP2 (rhs1 BITOP1 rhs2) or
9173 rhs1 BITOP2 (BITOP1 rhs2)
9174 depending on bitop1 and bitop2 arity. */
9175 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9177 if (code == GT_EXPR)
9179 bitop1 = BIT_NOT_EXPR;
9180 bitop2 = BIT_AND_EXPR;
9182 else if (code == GE_EXPR)
9184 bitop1 = BIT_NOT_EXPR;
9185 bitop2 = BIT_IOR_EXPR;
9187 else if (code == LT_EXPR)
9189 bitop1 = BIT_NOT_EXPR;
9190 bitop2 = BIT_AND_EXPR;
9191 std::swap (rhs1, rhs2);
9192 std::swap (dts[0], dts[1]);
9194 else if (code == LE_EXPR)
9196 bitop1 = BIT_NOT_EXPR;
9197 bitop2 = BIT_IOR_EXPR;
9198 std::swap (rhs1, rhs2);
9199 std::swap (dts[0], dts[1]);
9201 else
9203 bitop1 = BIT_XOR_EXPR;
9204 if (code == EQ_EXPR)
9205 bitop2 = BIT_NOT_EXPR;
9209 if (!vec_stmt)
9211 if (bitop1 == NOP_EXPR)
9213 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9214 return false;
9216 else
9218 machine_mode mode = TYPE_MODE (vectype);
9219 optab optab;
9221 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9222 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9223 return false;
9225 if (bitop2 != NOP_EXPR)
9227 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9228 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9229 return false;
9233 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9234 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9235 dts, ndts, slp_node, cost_vec);
9236 return true;
9239 /* Transform. */
9240 if (!slp_node)
9242 vec_oprnds0.create (1);
9243 vec_oprnds1.create (1);
9246 /* Handle def. */
9247 lhs = gimple_assign_lhs (stmt);
9248 mask = vect_create_destination_var (lhs, mask_type);
9250 /* Handle cmp expr. */
9251 for (j = 0; j < ncopies; j++)
9253 stmt_vec_info new_stmt_info = NULL;
9254 if (j == 0)
9256 if (slp_node)
9258 auto_vec<tree, 2> ops;
9259 auto_vec<vec<tree>, 2> vec_defs;
9261 ops.safe_push (rhs1);
9262 ops.safe_push (rhs2);
9263 vect_get_slp_defs (ops, slp_node, &vec_defs);
9264 vec_oprnds1 = vec_defs.pop ();
9265 vec_oprnds0 = vec_defs.pop ();
9267 else
9269 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9270 vectype);
9271 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9272 vectype);
9275 else
9277 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9278 vec_oprnds0.pop ());
9279 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9280 vec_oprnds1.pop ());
9283 if (!slp_node)
9285 vec_oprnds0.quick_push (vec_rhs1);
9286 vec_oprnds1.quick_push (vec_rhs2);
9289 /* Arguments are ready. Create the new vector stmt. */
9290 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9292 vec_rhs2 = vec_oprnds1[i];
9294 new_temp = make_ssa_name (mask);
9295 if (bitop1 == NOP_EXPR)
9297 gassign *new_stmt = gimple_build_assign (new_temp, code,
9298 vec_rhs1, vec_rhs2);
9299 new_stmt_info
9300 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9302 else
9304 gassign *new_stmt;
9305 if (bitop1 == BIT_NOT_EXPR)
9306 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9307 else
9308 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9309 vec_rhs2);
9310 new_stmt_info
9311 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9312 if (bitop2 != NOP_EXPR)
9314 tree res = make_ssa_name (mask);
9315 if (bitop2 == BIT_NOT_EXPR)
9316 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9317 else
9318 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9319 new_temp);
9320 new_stmt_info
9321 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9324 if (slp_node)
9325 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9328 if (slp_node)
9329 continue;
9331 if (j == 0)
9332 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9333 else
9334 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9336 prev_stmt_info = new_stmt_info;
9339 vec_oprnds0.release ();
9340 vec_oprnds1.release ();
9342 return true;
9345 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9346 can handle all live statements in the node. Otherwise return true
9347 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9348 GSI and VEC_STMT are as for vectorizable_live_operation. */
9350 static bool
9351 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9352 slp_tree slp_node, stmt_vec_info *vec_stmt,
9353 stmt_vector_for_cost *cost_vec)
9355 if (slp_node)
9357 stmt_vec_info slp_stmt_info;
9358 unsigned int i;
9359 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9361 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9362 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9363 vec_stmt, cost_vec))
9364 return false;
9367 else if (STMT_VINFO_LIVE_P (stmt_info)
9368 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9369 vec_stmt, cost_vec))
9370 return false;
9372 return true;
9375 /* Make sure the statement is vectorizable. */
9377 opt_result
9378 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9379 slp_tree node, slp_instance node_instance,
9380 stmt_vector_for_cost *cost_vec)
9382 vec_info *vinfo = stmt_info->vinfo;
9383 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9384 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9385 bool ok;
9386 gimple_seq pattern_def_seq;
9388 if (dump_enabled_p ())
9389 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9390 stmt_info->stmt);
9392 if (gimple_has_volatile_ops (stmt_info->stmt))
9393 return opt_result::failure_at (stmt_info->stmt,
9394 "not vectorized:"
9395 " stmt has volatile operands: %G\n",
9396 stmt_info->stmt);
9398 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9399 && node == NULL
9400 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9402 gimple_stmt_iterator si;
9404 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9406 stmt_vec_info pattern_def_stmt_info
9407 = vinfo->lookup_stmt (gsi_stmt (si));
9408 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9409 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9411 /* Analyze def stmt of STMT if it's a pattern stmt. */
9412 if (dump_enabled_p ())
9413 dump_printf_loc (MSG_NOTE, vect_location,
9414 "==> examining pattern def statement: %G",
9415 pattern_def_stmt_info->stmt);
9417 opt_result res
9418 = vect_analyze_stmt (pattern_def_stmt_info,
9419 need_to_vectorize, node, node_instance,
9420 cost_vec);
9421 if (!res)
9422 return res;
9427 /* Skip stmts that do not need to be vectorized. In loops this is expected
9428 to include:
9429 - the COND_EXPR which is the loop exit condition
9430 - any LABEL_EXPRs in the loop
9431 - computations that are used only for array indexing or loop control.
9432 In basic blocks we only analyze statements that are a part of some SLP
9433 instance, therefore, all the statements are relevant.
9435 Pattern statement needs to be analyzed instead of the original statement
9436 if the original statement is not relevant. Otherwise, we analyze both
9437 statements. In basic blocks we are called from some SLP instance
9438 traversal, don't analyze pattern stmts instead, the pattern stmts
9439 already will be part of SLP instance. */
9441 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9442 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9443 && !STMT_VINFO_LIVE_P (stmt_info))
9445 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9446 && pattern_stmt_info
9447 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9448 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9450 /* Analyze PATTERN_STMT instead of the original stmt. */
9451 stmt_info = pattern_stmt_info;
9452 if (dump_enabled_p ())
9453 dump_printf_loc (MSG_NOTE, vect_location,
9454 "==> examining pattern statement: %G",
9455 stmt_info->stmt);
9457 else
9459 if (dump_enabled_p ())
9460 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9462 return opt_result::success ();
9465 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9466 && node == NULL
9467 && pattern_stmt_info
9468 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9469 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9471 /* Analyze PATTERN_STMT too. */
9472 if (dump_enabled_p ())
9473 dump_printf_loc (MSG_NOTE, vect_location,
9474 "==> examining pattern statement: %G",
9475 pattern_stmt_info->stmt);
9477 opt_result res
9478 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9479 node_instance, cost_vec);
9480 if (!res)
9481 return res;
9484 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9486 case vect_internal_def:
9487 break;
9489 case vect_reduction_def:
9490 case vect_nested_cycle:
9491 gcc_assert (!bb_vinfo
9492 && (relevance == vect_used_in_outer
9493 || relevance == vect_used_in_outer_by_reduction
9494 || relevance == vect_used_by_reduction
9495 || relevance == vect_unused_in_scope
9496 || relevance == vect_used_only_live));
9497 break;
9499 case vect_induction_def:
9500 gcc_assert (!bb_vinfo);
9501 break;
9503 case vect_constant_def:
9504 case vect_external_def:
9505 case vect_unknown_def_type:
9506 default:
9507 gcc_unreachable ();
9510 if (STMT_VINFO_RELEVANT_P (stmt_info))
9512 tree type = gimple_expr_type (stmt_info->stmt);
9513 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9514 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9515 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9516 || (call && gimple_call_lhs (call) == NULL_TREE));
9517 *need_to_vectorize = true;
9520 if (PURE_SLP_STMT (stmt_info) && !node)
9522 dump_printf_loc (MSG_NOTE, vect_location,
9523 "handled only by SLP analysis\n");
9524 return opt_result::success ();
9527 ok = true;
9528 if (!bb_vinfo
9529 && (STMT_VINFO_RELEVANT_P (stmt_info)
9530 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9531 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9532 -mveclibabi= takes preference over library functions with
9533 the simd attribute. */
9534 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9535 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9536 cost_vec)
9537 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9538 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9539 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9540 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9541 cost_vec)
9542 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9543 || vectorizable_reduction (stmt_info, NULL, NULL, node,
9544 node_instance, cost_vec)
9545 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9546 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9547 || vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node,
9548 cost_vec)
9549 || vectorizable_comparison (stmt_info, NULL, NULL, NULL, node,
9550 cost_vec));
9551 else
9553 if (bb_vinfo)
9554 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9555 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9556 cost_vec)
9557 || vectorizable_conversion (stmt_info, NULL, NULL, node,
9558 cost_vec)
9559 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9560 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9561 || vectorizable_assignment (stmt_info, NULL, NULL, node,
9562 cost_vec)
9563 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9564 cost_vec)
9565 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9566 || vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node,
9567 cost_vec)
9568 || vectorizable_comparison (stmt_info, NULL, NULL, NULL, node,
9569 cost_vec));
9572 if (!ok)
9573 return opt_result::failure_at (stmt_info->stmt,
9574 "not vectorized:"
9575 " relevant stmt not supported: %G",
9576 stmt_info->stmt);
9578 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9579 need extra handling, except for vectorizable reductions. */
9580 if (!bb_vinfo
9581 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9582 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9583 return opt_result::failure_at (stmt_info->stmt,
9584 "not vectorized:"
9585 " live stmt not supported: %G",
9586 stmt_info->stmt);
9588 return opt_result::success ();
9592 /* Function vect_transform_stmt.
9594 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9596 bool
9597 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9598 slp_tree slp_node, slp_instance slp_node_instance)
9600 vec_info *vinfo = stmt_info->vinfo;
9601 bool is_store = false;
9602 stmt_vec_info vec_stmt = NULL;
9603 bool done;
9605 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9606 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9608 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9609 && nested_in_vect_loop_p
9610 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9611 stmt_info));
9613 gimple *stmt = stmt_info->stmt;
9614 switch (STMT_VINFO_TYPE (stmt_info))
9616 case type_demotion_vec_info_type:
9617 case type_promotion_vec_info_type:
9618 case type_conversion_vec_info_type:
9619 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9620 NULL);
9621 gcc_assert (done);
9622 break;
9624 case induc_vec_info_type:
9625 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9626 NULL);
9627 gcc_assert (done);
9628 break;
9630 case shift_vec_info_type:
9631 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9632 gcc_assert (done);
9633 break;
9635 case op_vec_info_type:
9636 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9637 NULL);
9638 gcc_assert (done);
9639 break;
9641 case assignment_vec_info_type:
9642 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9643 NULL);
9644 gcc_assert (done);
9645 break;
9647 case load_vec_info_type:
9648 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9649 slp_node_instance, NULL);
9650 gcc_assert (done);
9651 break;
9653 case store_vec_info_type:
9654 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9655 gcc_assert (done);
9656 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9658 /* In case of interleaving, the whole chain is vectorized when the
9659 last store in the chain is reached. Store stmts before the last
9660 one are skipped, and there vec_stmt_info shouldn't be freed
9661 meanwhile. */
9662 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9663 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9664 is_store = true;
9666 else
9667 is_store = true;
9668 break;
9670 case condition_vec_info_type:
9671 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, NULL, 0,
9672 slp_node, NULL);
9673 gcc_assert (done);
9674 break;
9676 case comparison_vec_info_type:
9677 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt, NULL,
9678 slp_node, NULL);
9679 gcc_assert (done);
9680 break;
9682 case call_vec_info_type:
9683 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9684 stmt = gsi_stmt (*gsi);
9685 break;
9687 case call_simd_clone_vec_info_type:
9688 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9689 slp_node, NULL);
9690 stmt = gsi_stmt (*gsi);
9691 break;
9693 case reduc_vec_info_type:
9694 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9695 slp_node_instance, NULL);
9696 gcc_assert (done);
9697 break;
9699 default:
9700 if (!STMT_VINFO_LIVE_P (stmt_info))
9702 if (dump_enabled_p ())
9703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9704 "stmt not supported.\n");
9705 gcc_unreachable ();
9709 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9710 This would break hybrid SLP vectorization. */
9711 if (slp_node)
9712 gcc_assert (!vec_stmt
9713 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9715 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9716 is being vectorized, but outside the immediately enclosing loop. */
9717 if (vec_stmt
9718 && nested_p
9719 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9720 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9721 || STMT_VINFO_RELEVANT (stmt_info) ==
9722 vect_used_in_outer_by_reduction))
9724 struct loop *innerloop = LOOP_VINFO_LOOP (
9725 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9726 imm_use_iterator imm_iter;
9727 use_operand_p use_p;
9728 tree scalar_dest;
9730 if (dump_enabled_p ())
9731 dump_printf_loc (MSG_NOTE, vect_location,
9732 "Record the vdef for outer-loop vectorization.\n");
9734 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9735 (to be used when vectorizing outer-loop stmts that use the DEF of
9736 STMT). */
9737 if (gimple_code (stmt) == GIMPLE_PHI)
9738 scalar_dest = PHI_RESULT (stmt);
9739 else
9740 scalar_dest = gimple_get_lhs (stmt);
9742 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9743 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9745 stmt_vec_info exit_phi_info
9746 = vinfo->lookup_stmt (USE_STMT (use_p));
9747 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9751 /* Handle stmts whose DEF is used outside the loop-nest that is
9752 being vectorized. */
9753 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9755 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9756 NULL);
9757 gcc_assert (done);
9760 if (vec_stmt)
9761 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9763 return is_store;
9767 /* Remove a group of stores (for SLP or interleaving), free their
9768 stmt_vec_info. */
9770 void
9771 vect_remove_stores (stmt_vec_info first_stmt_info)
9773 vec_info *vinfo = first_stmt_info->vinfo;
9774 stmt_vec_info next_stmt_info = first_stmt_info;
9776 while (next_stmt_info)
9778 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9779 next_stmt_info = vect_orig_stmt (next_stmt_info);
9780 /* Free the attached stmt_vec_info and remove the stmt. */
9781 vinfo->remove_stmt (next_stmt_info);
9782 next_stmt_info = tmp;
9786 /* Function get_vectype_for_scalar_type_and_size.
9788 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9789 by the target. */
9791 tree
9792 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9794 tree orig_scalar_type = scalar_type;
9795 scalar_mode inner_mode;
9796 machine_mode simd_mode;
9797 poly_uint64 nunits;
9798 tree vectype;
9800 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9801 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9802 return NULL_TREE;
9804 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9806 /* For vector types of elements whose mode precision doesn't
9807 match their types precision we use a element type of mode
9808 precision. The vectorization routines will have to make sure
9809 they support the proper result truncation/extension.
9810 We also make sure to build vector types with INTEGER_TYPE
9811 component type only. */
9812 if (INTEGRAL_TYPE_P (scalar_type)
9813 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9814 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9815 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9816 TYPE_UNSIGNED (scalar_type));
9818 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9819 When the component mode passes the above test simply use a type
9820 corresponding to that mode. The theory is that any use that
9821 would cause problems with this will disable vectorization anyway. */
9822 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9823 && !INTEGRAL_TYPE_P (scalar_type))
9824 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9826 /* We can't build a vector type of elements with alignment bigger than
9827 their size. */
9828 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9829 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9830 TYPE_UNSIGNED (scalar_type));
9832 /* If we felt back to using the mode fail if there was
9833 no scalar type for it. */
9834 if (scalar_type == NULL_TREE)
9835 return NULL_TREE;
9837 /* If no size was supplied use the mode the target prefers. Otherwise
9838 lookup a vector mode of the specified size. */
9839 if (known_eq (size, 0U))
9840 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9841 else if (!multiple_p (size, nbytes, &nunits)
9842 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9843 return NULL_TREE;
9844 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9845 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9846 return NULL_TREE;
9848 vectype = build_vector_type (scalar_type, nunits);
9850 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9851 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9852 return NULL_TREE;
9854 /* Re-attach the address-space qualifier if we canonicalized the scalar
9855 type. */
9856 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9857 return build_qualified_type
9858 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9860 return vectype;
9863 poly_uint64 current_vector_size;
9865 /* Function get_vectype_for_scalar_type.
9867 Returns the vector type corresponding to SCALAR_TYPE as supported
9868 by the target. */
9870 tree
9871 get_vectype_for_scalar_type (tree scalar_type)
9873 tree vectype;
9874 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9875 current_vector_size);
9876 if (vectype
9877 && known_eq (current_vector_size, 0U))
9878 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9879 return vectype;
9882 /* Function get_mask_type_for_scalar_type.
9884 Returns the mask type corresponding to a result of comparison
9885 of vectors of specified SCALAR_TYPE as supported by target. */
9887 tree
9888 get_mask_type_for_scalar_type (tree scalar_type)
9890 tree vectype = get_vectype_for_scalar_type (scalar_type);
9892 if (!vectype)
9893 return NULL;
9895 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9896 current_vector_size);
9899 /* Function get_same_sized_vectype
9901 Returns a vector type corresponding to SCALAR_TYPE of size
9902 VECTOR_TYPE if supported by the target. */
9904 tree
9905 get_same_sized_vectype (tree scalar_type, tree vector_type)
9907 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9908 return build_same_sized_truth_vector_type (vector_type);
9910 return get_vectype_for_scalar_type_and_size
9911 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9914 /* Function vect_is_simple_use.
9916 Input:
9917 VINFO - the vect info of the loop or basic block that is being vectorized.
9918 OPERAND - operand in the loop or bb.
9919 Output:
9920 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
9921 case OPERAND is an SSA_NAME that is defined in the vectorizable region
9922 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
9923 the definition could be anywhere in the function
9924 DT - the type of definition
9926 Returns whether a stmt with OPERAND can be vectorized.
9927 For loops, supportable operands are constants, loop invariants, and operands
9928 that are defined by the current iteration of the loop. Unsupportable
9929 operands are those that are defined by a previous iteration of the loop (as
9930 is the case in reduction/induction computations).
9931 For basic blocks, supportable operands are constants and bb invariants.
9932 For now, operands defined outside the basic block are not supported. */
9934 bool
9935 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
9936 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
9938 if (def_stmt_info_out)
9939 *def_stmt_info_out = NULL;
9940 if (def_stmt_out)
9941 *def_stmt_out = NULL;
9942 *dt = vect_unknown_def_type;
9944 if (dump_enabled_p ())
9946 dump_printf_loc (MSG_NOTE, vect_location,
9947 "vect_is_simple_use: operand ");
9948 if (TREE_CODE (operand) == SSA_NAME
9949 && !SSA_NAME_IS_DEFAULT_DEF (operand))
9950 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
9951 else
9952 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9955 if (CONSTANT_CLASS_P (operand))
9956 *dt = vect_constant_def;
9957 else if (is_gimple_min_invariant (operand))
9958 *dt = vect_external_def;
9959 else if (TREE_CODE (operand) != SSA_NAME)
9960 *dt = vect_unknown_def_type;
9961 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
9962 *dt = vect_external_def;
9963 else
9965 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
9966 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
9967 if (!stmt_vinfo)
9968 *dt = vect_external_def;
9969 else
9971 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
9972 def_stmt = stmt_vinfo->stmt;
9973 switch (gimple_code (def_stmt))
9975 case GIMPLE_PHI:
9976 case GIMPLE_ASSIGN:
9977 case GIMPLE_CALL:
9978 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9979 break;
9980 default:
9981 *dt = vect_unknown_def_type;
9982 break;
9984 if (def_stmt_info_out)
9985 *def_stmt_info_out = stmt_vinfo;
9987 if (def_stmt_out)
9988 *def_stmt_out = def_stmt;
9991 if (dump_enabled_p ())
9993 dump_printf (MSG_NOTE, ", type of def: ");
9994 switch (*dt)
9996 case vect_uninitialized_def:
9997 dump_printf (MSG_NOTE, "uninitialized\n");
9998 break;
9999 case vect_constant_def:
10000 dump_printf (MSG_NOTE, "constant\n");
10001 break;
10002 case vect_external_def:
10003 dump_printf (MSG_NOTE, "external\n");
10004 break;
10005 case vect_internal_def:
10006 dump_printf (MSG_NOTE, "internal\n");
10007 break;
10008 case vect_induction_def:
10009 dump_printf (MSG_NOTE, "induction\n");
10010 break;
10011 case vect_reduction_def:
10012 dump_printf (MSG_NOTE, "reduction\n");
10013 break;
10014 case vect_double_reduction_def:
10015 dump_printf (MSG_NOTE, "double reduction\n");
10016 break;
10017 case vect_nested_cycle:
10018 dump_printf (MSG_NOTE, "nested cycle\n");
10019 break;
10020 case vect_unknown_def_type:
10021 dump_printf (MSG_NOTE, "unknown\n");
10022 break;
10026 if (*dt == vect_unknown_def_type)
10028 if (dump_enabled_p ())
10029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10030 "Unsupported pattern.\n");
10031 return false;
10034 return true;
10037 /* Function vect_is_simple_use.
10039 Same as vect_is_simple_use but also determines the vector operand
10040 type of OPERAND and stores it to *VECTYPE. If the definition of
10041 OPERAND is vect_uninitialized_def, vect_constant_def or
10042 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10043 is responsible to compute the best suited vector type for the
10044 scalar operand. */
10046 bool
10047 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10048 tree *vectype, stmt_vec_info *def_stmt_info_out,
10049 gimple **def_stmt_out)
10051 stmt_vec_info def_stmt_info;
10052 gimple *def_stmt;
10053 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10054 return false;
10056 if (def_stmt_out)
10057 *def_stmt_out = def_stmt;
10058 if (def_stmt_info_out)
10059 *def_stmt_info_out = def_stmt_info;
10061 /* Now get a vector type if the def is internal, otherwise supply
10062 NULL_TREE and leave it up to the caller to figure out a proper
10063 type for the use stmt. */
10064 if (*dt == vect_internal_def
10065 || *dt == vect_induction_def
10066 || *dt == vect_reduction_def
10067 || *dt == vect_double_reduction_def
10068 || *dt == vect_nested_cycle)
10070 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10071 gcc_assert (*vectype != NULL_TREE);
10072 if (dump_enabled_p ())
10073 dump_printf_loc (MSG_NOTE, vect_location,
10074 "vect_is_simple_use: vectype %T\n", *vectype);
10076 else if (*dt == vect_uninitialized_def
10077 || *dt == vect_constant_def
10078 || *dt == vect_external_def)
10079 *vectype = NULL_TREE;
10080 else
10081 gcc_unreachable ();
10083 return true;
10087 /* Function supportable_widening_operation
10089 Check whether an operation represented by the code CODE is a
10090 widening operation that is supported by the target platform in
10091 vector form (i.e., when operating on arguments of type VECTYPE_IN
10092 producing a result of type VECTYPE_OUT).
10094 Widening operations we currently support are NOP (CONVERT), FLOAT,
10095 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10096 are supported by the target platform either directly (via vector
10097 tree-codes), or via target builtins.
10099 Output:
10100 - CODE1 and CODE2 are codes of vector operations to be used when
10101 vectorizing the operation, if available.
10102 - MULTI_STEP_CVT determines the number of required intermediate steps in
10103 case of multi-step conversion (like char->short->int - in that case
10104 MULTI_STEP_CVT will be 1).
10105 - INTERM_TYPES contains the intermediate type required to perform the
10106 widening operation (short in the above example). */
10108 bool
10109 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10110 tree vectype_out, tree vectype_in,
10111 enum tree_code *code1, enum tree_code *code2,
10112 int *multi_step_cvt,
10113 vec<tree> *interm_types)
10115 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10116 struct loop *vect_loop = NULL;
10117 machine_mode vec_mode;
10118 enum insn_code icode1, icode2;
10119 optab optab1, optab2;
10120 tree vectype = vectype_in;
10121 tree wide_vectype = vectype_out;
10122 enum tree_code c1, c2;
10123 int i;
10124 tree prev_type, intermediate_type;
10125 machine_mode intermediate_mode, prev_mode;
10126 optab optab3, optab4;
10128 *multi_step_cvt = 0;
10129 if (loop_info)
10130 vect_loop = LOOP_VINFO_LOOP (loop_info);
10132 switch (code)
10134 case WIDEN_MULT_EXPR:
10135 /* The result of a vectorized widening operation usually requires
10136 two vectors (because the widened results do not fit into one vector).
10137 The generated vector results would normally be expected to be
10138 generated in the same order as in the original scalar computation,
10139 i.e. if 8 results are generated in each vector iteration, they are
10140 to be organized as follows:
10141 vect1: [res1,res2,res3,res4],
10142 vect2: [res5,res6,res7,res8].
10144 However, in the special case that the result of the widening
10145 operation is used in a reduction computation only, the order doesn't
10146 matter (because when vectorizing a reduction we change the order of
10147 the computation). Some targets can take advantage of this and
10148 generate more efficient code. For example, targets like Altivec,
10149 that support widen_mult using a sequence of {mult_even,mult_odd}
10150 generate the following vectors:
10151 vect1: [res1,res3,res5,res7],
10152 vect2: [res2,res4,res6,res8].
10154 When vectorizing outer-loops, we execute the inner-loop sequentially
10155 (each vectorized inner-loop iteration contributes to VF outer-loop
10156 iterations in parallel). We therefore don't allow to change the
10157 order of the computation in the inner-loop during outer-loop
10158 vectorization. */
10159 /* TODO: Another case in which order doesn't *really* matter is when we
10160 widen and then contract again, e.g. (short)((int)x * y >> 8).
10161 Normally, pack_trunc performs an even/odd permute, whereas the
10162 repack from an even/odd expansion would be an interleave, which
10163 would be significantly simpler for e.g. AVX2. */
10164 /* In any case, in order to avoid duplicating the code below, recurse
10165 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10166 are properly set up for the caller. If we fail, we'll continue with
10167 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10168 if (vect_loop
10169 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10170 && !nested_in_vect_loop_p (vect_loop, stmt_info)
10171 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10172 stmt_info, vectype_out,
10173 vectype_in, code1, code2,
10174 multi_step_cvt, interm_types))
10176 /* Elements in a vector with vect_used_by_reduction property cannot
10177 be reordered if the use chain with this property does not have the
10178 same operation. One such an example is s += a * b, where elements
10179 in a and b cannot be reordered. Here we check if the vector defined
10180 by STMT is only directly used in the reduction statement. */
10181 tree lhs = gimple_assign_lhs (stmt_info->stmt);
10182 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10183 if (use_stmt_info
10184 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10185 return true;
10187 c1 = VEC_WIDEN_MULT_LO_EXPR;
10188 c2 = VEC_WIDEN_MULT_HI_EXPR;
10189 break;
10191 case DOT_PROD_EXPR:
10192 c1 = DOT_PROD_EXPR;
10193 c2 = DOT_PROD_EXPR;
10194 break;
10196 case SAD_EXPR:
10197 c1 = SAD_EXPR;
10198 c2 = SAD_EXPR;
10199 break;
10201 case VEC_WIDEN_MULT_EVEN_EXPR:
10202 /* Support the recursion induced just above. */
10203 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10204 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10205 break;
10207 case WIDEN_LSHIFT_EXPR:
10208 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10209 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10210 break;
10212 CASE_CONVERT:
10213 c1 = VEC_UNPACK_LO_EXPR;
10214 c2 = VEC_UNPACK_HI_EXPR;
10215 break;
10217 case FLOAT_EXPR:
10218 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10219 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10220 break;
10222 case FIX_TRUNC_EXPR:
10223 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10224 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10225 break;
10227 default:
10228 gcc_unreachable ();
10231 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10232 std::swap (c1, c2);
10234 if (code == FIX_TRUNC_EXPR)
10236 /* The signedness is determined from output operand. */
10237 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10238 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10240 else
10242 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10243 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10246 if (!optab1 || !optab2)
10247 return false;
10249 vec_mode = TYPE_MODE (vectype);
10250 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10251 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10252 return false;
10254 *code1 = c1;
10255 *code2 = c2;
10257 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10258 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10259 /* For scalar masks we may have different boolean
10260 vector types having the same QImode. Thus we
10261 add additional check for elements number. */
10262 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10263 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10264 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10266 /* Check if it's a multi-step conversion that can be done using intermediate
10267 types. */
10269 prev_type = vectype;
10270 prev_mode = vec_mode;
10272 if (!CONVERT_EXPR_CODE_P (code))
10273 return false;
10275 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10276 intermediate steps in promotion sequence. We try
10277 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10278 not. */
10279 interm_types->create (MAX_INTERM_CVT_STEPS);
10280 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10282 intermediate_mode = insn_data[icode1].operand[0].mode;
10283 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10285 intermediate_type = vect_halve_mask_nunits (prev_type);
10286 if (intermediate_mode != TYPE_MODE (intermediate_type))
10287 return false;
10289 else
10290 intermediate_type
10291 = lang_hooks.types.type_for_mode (intermediate_mode,
10292 TYPE_UNSIGNED (prev_type));
10294 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10295 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10297 if (!optab3 || !optab4
10298 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10299 || insn_data[icode1].operand[0].mode != intermediate_mode
10300 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10301 || insn_data[icode2].operand[0].mode != intermediate_mode
10302 || ((icode1 = optab_handler (optab3, intermediate_mode))
10303 == CODE_FOR_nothing)
10304 || ((icode2 = optab_handler (optab4, intermediate_mode))
10305 == CODE_FOR_nothing))
10306 break;
10308 interm_types->quick_push (intermediate_type);
10309 (*multi_step_cvt)++;
10311 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10312 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10313 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10314 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10315 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10317 prev_type = intermediate_type;
10318 prev_mode = intermediate_mode;
10321 interm_types->release ();
10322 return false;
10326 /* Function supportable_narrowing_operation
10328 Check whether an operation represented by the code CODE is a
10329 narrowing operation that is supported by the target platform in
10330 vector form (i.e., when operating on arguments of type VECTYPE_IN
10331 and producing a result of type VECTYPE_OUT).
10333 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10334 and FLOAT. This function checks if these operations are supported by
10335 the target platform directly via vector tree-codes.
10337 Output:
10338 - CODE1 is the code of a vector operation to be used when
10339 vectorizing the operation, if available.
10340 - MULTI_STEP_CVT determines the number of required intermediate steps in
10341 case of multi-step conversion (like int->short->char - in that case
10342 MULTI_STEP_CVT will be 1).
10343 - INTERM_TYPES contains the intermediate type required to perform the
10344 narrowing operation (short in the above example). */
10346 bool
10347 supportable_narrowing_operation (enum tree_code code,
10348 tree vectype_out, tree vectype_in,
10349 enum tree_code *code1, int *multi_step_cvt,
10350 vec<tree> *interm_types)
10352 machine_mode vec_mode;
10353 enum insn_code icode1;
10354 optab optab1, interm_optab;
10355 tree vectype = vectype_in;
10356 tree narrow_vectype = vectype_out;
10357 enum tree_code c1;
10358 tree intermediate_type, prev_type;
10359 machine_mode intermediate_mode, prev_mode;
10360 int i;
10361 bool uns;
10363 *multi_step_cvt = 0;
10364 switch (code)
10366 CASE_CONVERT:
10367 c1 = VEC_PACK_TRUNC_EXPR;
10368 break;
10370 case FIX_TRUNC_EXPR:
10371 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10372 break;
10374 case FLOAT_EXPR:
10375 c1 = VEC_PACK_FLOAT_EXPR;
10376 break;
10378 default:
10379 gcc_unreachable ();
10382 if (code == FIX_TRUNC_EXPR)
10383 /* The signedness is determined from output operand. */
10384 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10385 else
10386 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10388 if (!optab1)
10389 return false;
10391 vec_mode = TYPE_MODE (vectype);
10392 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10393 return false;
10395 *code1 = c1;
10397 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10398 /* For scalar masks we may have different boolean
10399 vector types having the same QImode. Thus we
10400 add additional check for elements number. */
10401 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10402 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10403 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10405 if (code == FLOAT_EXPR)
10406 return false;
10408 /* Check if it's a multi-step conversion that can be done using intermediate
10409 types. */
10410 prev_mode = vec_mode;
10411 prev_type = vectype;
10412 if (code == FIX_TRUNC_EXPR)
10413 uns = TYPE_UNSIGNED (vectype_out);
10414 else
10415 uns = TYPE_UNSIGNED (vectype);
10417 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10418 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10419 costly than signed. */
10420 if (code == FIX_TRUNC_EXPR && uns)
10422 enum insn_code icode2;
10424 intermediate_type
10425 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10426 interm_optab
10427 = optab_for_tree_code (c1, intermediate_type, optab_default);
10428 if (interm_optab != unknown_optab
10429 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10430 && insn_data[icode1].operand[0].mode
10431 == insn_data[icode2].operand[0].mode)
10433 uns = false;
10434 optab1 = interm_optab;
10435 icode1 = icode2;
10439 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10440 intermediate steps in promotion sequence. We try
10441 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10442 interm_types->create (MAX_INTERM_CVT_STEPS);
10443 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10445 intermediate_mode = insn_data[icode1].operand[0].mode;
10446 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10448 intermediate_type = vect_double_mask_nunits (prev_type);
10449 if (intermediate_mode != TYPE_MODE (intermediate_type))
10450 return false;
10452 else
10453 intermediate_type
10454 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10455 interm_optab
10456 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10457 optab_default);
10458 if (!interm_optab
10459 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10460 || insn_data[icode1].operand[0].mode != intermediate_mode
10461 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10462 == CODE_FOR_nothing))
10463 break;
10465 interm_types->quick_push (intermediate_type);
10466 (*multi_step_cvt)++;
10468 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10469 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10470 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10471 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10473 prev_mode = intermediate_mode;
10474 prev_type = intermediate_type;
10475 optab1 = interm_optab;
10478 interm_types->release ();
10479 return false;
10482 /* Generate and return a statement that sets vector mask MASK such that
10483 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10485 gcall *
10486 vect_gen_while (tree mask, tree start_index, tree end_index)
10488 tree cmp_type = TREE_TYPE (start_index);
10489 tree mask_type = TREE_TYPE (mask);
10490 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10491 cmp_type, mask_type,
10492 OPTIMIZE_FOR_SPEED));
10493 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10494 start_index, end_index,
10495 build_zero_cst (mask_type));
10496 gimple_call_set_lhs (call, mask);
10497 return call;
10500 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10501 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10503 tree
10504 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10505 tree end_index)
10507 tree tmp = make_ssa_name (mask_type);
10508 gcall *call = vect_gen_while (tmp, start_index, end_index);
10509 gimple_seq_add_stmt (seq, call);
10510 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10513 /* Try to compute the vector types required to vectorize STMT_INFO,
10514 returning true on success and false if vectorization isn't possible.
10516 On success:
10518 - Set *STMT_VECTYPE_OUT to:
10519 - NULL_TREE if the statement doesn't need to be vectorized;
10520 - boolean_type_node if the statement is a boolean operation whose
10521 vector type can only be determined once all the other vector types
10522 are known; and
10523 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10525 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10526 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10527 statement does not help to determine the overall number of units. */
10529 opt_result
10530 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10531 tree *stmt_vectype_out,
10532 tree *nunits_vectype_out)
10534 gimple *stmt = stmt_info->stmt;
10536 *stmt_vectype_out = NULL_TREE;
10537 *nunits_vectype_out = NULL_TREE;
10539 if (gimple_get_lhs (stmt) == NULL_TREE
10540 /* MASK_STORE has no lhs, but is ok. */
10541 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10543 if (is_a <gcall *> (stmt))
10545 /* Ignore calls with no lhs. These must be calls to
10546 #pragma omp simd functions, and what vectorization factor
10547 it really needs can't be determined until
10548 vectorizable_simd_clone_call. */
10549 if (dump_enabled_p ())
10550 dump_printf_loc (MSG_NOTE, vect_location,
10551 "defer to SIMD clone analysis.\n");
10552 return opt_result::success ();
10555 return opt_result::failure_at (stmt,
10556 "not vectorized: irregular stmt.%G", stmt);
10559 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10560 return opt_result::failure_at (stmt,
10561 "not vectorized: vector stmt in loop:%G",
10562 stmt);
10564 tree vectype;
10565 tree scalar_type = NULL_TREE;
10566 if (STMT_VINFO_VECTYPE (stmt_info))
10567 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10568 else
10570 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10571 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10572 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10573 else
10574 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10576 /* Pure bool ops don't participate in number-of-units computation.
10577 For comparisons use the types being compared. */
10578 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10579 && is_gimple_assign (stmt)
10580 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10582 *stmt_vectype_out = boolean_type_node;
10584 tree rhs1 = gimple_assign_rhs1 (stmt);
10585 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10586 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10587 scalar_type = TREE_TYPE (rhs1);
10588 else
10590 if (dump_enabled_p ())
10591 dump_printf_loc (MSG_NOTE, vect_location,
10592 "pure bool operation.\n");
10593 return opt_result::success ();
10597 if (dump_enabled_p ())
10598 dump_printf_loc (MSG_NOTE, vect_location,
10599 "get vectype for scalar type: %T\n", scalar_type);
10600 vectype = get_vectype_for_scalar_type (scalar_type);
10601 if (!vectype)
10602 return opt_result::failure_at (stmt,
10603 "not vectorized:"
10604 " unsupported data-type %T\n",
10605 scalar_type);
10607 if (!*stmt_vectype_out)
10608 *stmt_vectype_out = vectype;
10610 if (dump_enabled_p ())
10611 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
10614 /* Don't try to compute scalar types if the stmt produces a boolean
10615 vector; use the existing vector type instead. */
10616 tree nunits_vectype;
10617 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10618 nunits_vectype = vectype;
10619 else
10621 /* The number of units is set according to the smallest scalar
10622 type (or the largest vector size, but we only support one
10623 vector size per vectorization). */
10624 if (*stmt_vectype_out != boolean_type_node)
10626 HOST_WIDE_INT dummy;
10627 scalar_type = vect_get_smallest_scalar_type (stmt_info,
10628 &dummy, &dummy);
10630 if (dump_enabled_p ())
10631 dump_printf_loc (MSG_NOTE, vect_location,
10632 "get vectype for scalar type: %T\n", scalar_type);
10633 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10635 if (!nunits_vectype)
10636 return opt_result::failure_at (stmt,
10637 "not vectorized: unsupported data-type %T\n",
10638 scalar_type);
10640 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10641 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10642 return opt_result::failure_at (stmt,
10643 "not vectorized: different sized vector "
10644 "types in statement, %T and %T\n",
10645 vectype, nunits_vectype);
10647 if (dump_enabled_p ())
10649 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10650 nunits_vectype);
10652 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10653 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10654 dump_printf (MSG_NOTE, "\n");
10657 *nunits_vectype_out = nunits_vectype;
10658 return opt_result::success ();
10661 /* Try to determine the correct vector type for STMT_INFO, which is a
10662 statement that produces a scalar boolean result. Return the vector
10663 type on success, otherwise return NULL_TREE. */
10665 opt_tree
10666 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10668 gimple *stmt = stmt_info->stmt;
10669 tree mask_type = NULL;
10670 tree vectype, scalar_type;
10672 if (is_gimple_assign (stmt)
10673 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10674 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10676 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10677 mask_type = get_mask_type_for_scalar_type (scalar_type);
10679 if (!mask_type)
10680 return opt_tree::failure_at (stmt,
10681 "not vectorized: unsupported mask\n");
10683 else
10685 tree rhs;
10686 ssa_op_iter iter;
10687 enum vect_def_type dt;
10689 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10691 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10692 return opt_tree::failure_at (stmt,
10693 "not vectorized:can't compute mask"
10694 " type for statement, %G", stmt);
10696 /* No vectype probably means external definition.
10697 Allow it in case there is another operand which
10698 allows to determine mask type. */
10699 if (!vectype)
10700 continue;
10702 if (!mask_type)
10703 mask_type = vectype;
10704 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10705 TYPE_VECTOR_SUBPARTS (vectype)))
10706 return opt_tree::failure_at (stmt,
10707 "not vectorized: different sized mask"
10708 " types in statement, %T and %T\n",
10709 mask_type, vectype);
10710 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10711 != VECTOR_BOOLEAN_TYPE_P (vectype))
10712 return opt_tree::failure_at (stmt,
10713 "not vectorized: mixed mask and "
10714 "nonmask vector types in statement, "
10715 "%T and %T\n",
10716 mask_type, vectype);
10719 /* We may compare boolean value loaded as vector of integers.
10720 Fix mask_type in such case. */
10721 if (mask_type
10722 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10723 && gimple_code (stmt) == GIMPLE_ASSIGN
10724 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10725 mask_type = build_same_sized_truth_vector_type (mask_type);
10728 /* No mask_type should mean loop invariant predicate.
10729 This is probably a subject for optimization in if-conversion. */
10730 if (!mask_type)
10731 return opt_tree::failure_at (stmt,
10732 "not vectorized: can't compute mask type "
10733 "for statement: %G", stmt);
10735 return opt_tree::success (mask_type);