pr88074.c: Require c99_runtime.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob6c631db90399264945bb4d2cabb269976becc45e
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
101 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
102 body_cost_vec->safe_push (si);
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
111 static tree
112 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT_INFO and the vector is associated
121 with scalar destination SCALAR_DEST. */
123 static tree
124 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
125 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
127 tree vect_type, vect, vect_name, array_ref;
128 gimple *new_stmt;
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
140 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
142 return vect_name;
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT_INFO. */
149 static void
150 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
151 tree vect, tree array, unsigned HOST_WIDE_INT n)
153 tree array_ref;
154 gimple *new_stmt;
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
160 new_stmt = gimple_build_assign (array_ref, vect);
161 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
168 static tree
169 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
171 tree mem_ref;
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
176 return mem_ref;
179 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
180 Emit the clobber before *GSI. */
182 static void
183 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
184 tree var)
186 tree clobber = build_clobber (TREE_TYPE (var));
187 gimple *new_stmt = gimple_build_assign (var, clobber);
188 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
197 static void
198 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
199 enum vect_relevant relevant, bool live_p)
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
204 if (dump_enabled_p ())
205 dump_printf_loc (MSG_NOTE, vect_location,
206 "mark relevant %d, live %d: %G", relevant, live_p,
207 stmt_info->stmt);
209 /* If this stmt is an original stmt in a pattern, we might need to mark its
210 related pattern stmt instead of the original stmt. However, such stmts
211 may have their own uses that are not in any pattern, in such cases the
212 stmt itself should be marked. */
213 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
215 /* This is the last stmt in a sequence that was detected as a
216 pattern that can potentially be vectorized. Don't mark the stmt
217 as relevant/live because it's not going to be vectorized.
218 Instead mark the pattern-stmt that replaces it. */
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_vec_info old_stmt_info = stmt_info;
225 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
226 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
227 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
228 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE, vect_location,
240 "already marked relevant/live.\n");
241 return;
244 worklist->safe_push (stmt_info);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT_INFO is simple and all uses of it are invariant. */
252 bool
253 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
254 loop_vec_info loop_vinfo)
256 tree op;
257 ssa_op_iter iter;
259 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
260 if (!stmt)
261 return false;
263 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
265 enum vect_def_type dt = vect_uninitialized_def;
267 if (!vect_is_simple_use (op, loop_vinfo, &dt))
269 if (dump_enabled_p ())
270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
271 "use not simple.\n");
272 return false;
275 if (dt != vect_external_def && dt != vect_constant_def)
276 return false;
278 return true;
281 /* Function vect_stmt_relevant_p.
283 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
284 is "relevant for vectorization".
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
291 CHECKME: what other side effects would the vectorizer allow? */
293 static bool
294 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
295 enum vect_relevant *relevant, bool *live_p)
297 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
298 ssa_op_iter op_iter;
299 imm_use_iterator imm_iter;
300 use_operand_p use_p;
301 def_operand_p def_p;
303 *relevant = vect_unused_in_scope;
304 *live_p = false;
306 /* cond stmt other than loop exit cond. */
307 if (is_ctrl_stmt (stmt_info->stmt)
308 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
311 /* changing memory. */
312 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt_info->stmt)
314 && !gimple_clobber_p (stmt_info->stmt))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
342 *live_p = true;
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
356 return (*live_p || *relevant);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT_INFO. Check if USE is
363 used in STMT_INFO for anything other than indexing an array. */
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
368 tree operand;
370 /* USE corresponds to some operand in STMT. If there is no data
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info))
374 return true;
376 /* STMT has a data_ref. FORNOW this means that its of one of
377 the following forms:
378 -1- ARRAY_REF = var
379 -2- var = ARRAY_REF
380 (This should have been verified in analyze_data_refs).
382 'var' in the second case corresponds to a def, not a use,
383 so USE cannot correspond to any operands that are not used
384 for array indexing.
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
389 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
390 if (!assign || !gimple_assign_copy_p (assign))
392 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
393 if (call && gimple_call_internal_p (call))
395 internal_fn ifn = gimple_call_internal_fn (call);
396 int mask_index = internal_fn_mask_index (ifn);
397 if (mask_index >= 0
398 && use == gimple_call_arg (call, mask_index))
399 return true;
400 int stored_value_index = internal_fn_stored_value_index (ifn);
401 if (stored_value_index >= 0
402 && use == gimple_call_arg (call, stored_value_index))
403 return true;
404 if (internal_gather_scatter_fn_p (ifn)
405 && use == gimple_call_arg (call, 1))
406 return true;
408 return false;
411 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
412 return false;
413 operand = gimple_assign_rhs1 (assign);
414 if (TREE_CODE (operand) != SSA_NAME)
415 return false;
417 if (operand == use)
418 return true;
420 return false;
425 Function process_use.
427 Inputs:
428 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433 be performed.
435 Outputs:
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
440 Exceptions:
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
445 we skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
447 "relevant" will be modified accordingly.
449 Return true if everything is as expected. Return false otherwise. */
451 static opt_result
452 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
453 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
454 bool force)
456 stmt_vec_info dstmt_vinfo;
457 basic_block bb, def_bb;
458 enum vect_def_type dt;
460 /* case 1: we are only interested in uses that need to be vectorized. Uses
461 that are used for address computation are not considered relevant. */
462 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
463 return opt_result::success ();
465 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
466 return opt_result::failure_at (stmt_vinfo->stmt,
467 "not vectorized:"
468 " unsupported use in stmt.\n");
470 if (!dstmt_vinfo)
471 return opt_result::success ();
473 def_bb = gimple_bb (dstmt_vinfo->stmt);
475 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
476 DSTMT_VINFO must have already been processed, because this should be the
477 only way that STMT, which is a reduction-phi, was put in the worklist,
478 as there should be no other uses for DSTMT_VINFO in the loop. So we just
479 check that everything is as expected, and we are done. */
480 bb = gimple_bb (stmt_vinfo->stmt);
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
491 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
492 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
610 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
652 use_operand_p use_p;
653 ssa_op_iter iter;
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
704 default:
705 break;
708 if (is_pattern_stmt_p (stmt_vinfo))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
732 for (; i < gimple_num_ops (assign); i++)
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
747 for (i = 0; i < gimple_call_num_args (call); i++)
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 return res;
780 } /* while worklist */
782 return opt_result::success ();
785 /* Compute the prologue cost for invariant or constant operands. */
787 static unsigned
788 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
789 unsigned opno, enum vect_def_type dt,
790 stmt_vector_for_cost *cost_vec)
792 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
793 tree op = gimple_op (stmt, opno);
794 unsigned prologue_cost = 0;
796 /* Without looking at the actual initializer a vector of
797 constants can be implemented as load from the constant pool.
798 When all elements are the same we can use a splat. */
799 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
800 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
801 unsigned num_vects_to_check;
802 unsigned HOST_WIDE_INT const_nunits;
803 unsigned nelt_limit;
804 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
805 && ! multiple_p (const_nunits, group_size))
807 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
808 nelt_limit = const_nunits;
810 else
812 /* If either the vector has variable length or the vectors
813 are composed of repeated whole groups we only need to
814 cost construction once. All vectors will be the same. */
815 num_vects_to_check = 1;
816 nelt_limit = group_size;
818 tree elt = NULL_TREE;
819 unsigned nelt = 0;
820 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
822 unsigned si = j % group_size;
823 if (nelt == 0)
824 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
825 /* ??? We're just tracking whether all operands of a single
826 vector initializer are the same, ideally we'd check if
827 we emitted the same one already. */
828 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
829 opno))
830 elt = NULL_TREE;
831 nelt++;
832 if (nelt == nelt_limit)
834 /* ??? We need to pass down stmt_info for a vector type
835 even if it points to the wrong stmt. */
836 prologue_cost += record_stmt_cost
837 (cost_vec, 1,
838 dt == vect_external_def
839 ? (elt ? scalar_to_vec : vec_construct)
840 : vector_load,
841 stmt_info, 0, vect_prologue);
842 nelt = 0;
846 return prologue_cost;
849 /* Function vect_model_simple_cost.
851 Models cost for simple operations, i.e. those that only emit ncopies of a
852 single op. Right now, this does not account for multiple insns that could
853 be generated for the single vector op. We will handle that shortly. */
855 static void
856 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
857 enum vect_def_type *dt,
858 int ndts,
859 slp_tree node,
860 stmt_vector_for_cost *cost_vec)
862 int inside_cost = 0, prologue_cost = 0;
864 gcc_assert (cost_vec != NULL);
866 /* ??? Somehow we need to fix this at the callers. */
867 if (node)
868 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
870 if (node)
872 /* Scan operands and account for prologue cost of constants/externals.
873 ??? This over-estimates cost for multiple uses and should be
874 re-engineered. */
875 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
876 tree lhs = gimple_get_lhs (stmt);
877 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
879 tree op = gimple_op (stmt, i);
880 enum vect_def_type dt;
881 if (!op || op == lhs)
882 continue;
883 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
884 && (dt == vect_constant_def || dt == vect_external_def))
885 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
886 i, dt, cost_vec);
889 else
890 /* Cost the "broadcast" of a scalar operand in to a vector operand.
891 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
892 cost model. */
893 for (int i = 0; i < ndts; i++)
894 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
895 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
896 stmt_info, 0, vect_prologue);
898 /* Adjust for two-operator SLP nodes. */
899 if (node && SLP_TREE_TWO_OPERATORS (node))
901 ncopies *= 2;
902 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
903 stmt_info, 0, vect_body);
906 /* Pass the inside-of-loop statements to the target-specific cost model. */
907 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
908 stmt_info, 0, vect_body);
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE, vect_location,
912 "vect_model_simple_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost, prologue_cost);
917 /* Model cost for type demotion and promotion operations. PWR is normally
918 zero for single-step promotions and demotions. It will be one if
919 two-step promotion/demotion is required, and so on. Each additional
920 step doubles the number of instructions required. */
922 static void
923 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
924 enum vect_def_type *dt, int pwr,
925 stmt_vector_for_cost *cost_vec)
927 int i, tmp;
928 int inside_cost = 0, prologue_cost = 0;
930 for (i = 0; i < pwr + 1; i++)
932 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
933 (i + 1) : i;
934 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
935 vec_promote_demote, stmt_info, 0,
936 vect_body);
939 /* FORNOW: Assuming maximum 2 args per stmts. */
940 for (i = 0; i < 2; i++)
941 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
942 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
943 stmt_info, 0, vect_prologue);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "vect_model_promotion_demotion_cost: inside_cost = %d, "
948 "prologue_cost = %d .\n", inside_cost, prologue_cost);
951 /* Function vect_model_store_cost
953 Models cost for stores. In the case of grouped accesses, one access
954 has the overhead of the grouped access attributed to it. */
956 static void
957 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
958 enum vect_def_type dt,
959 vect_memory_access_type memory_access_type,
960 vec_load_store_type vls_type, slp_tree slp_node,
961 stmt_vector_for_cost *cost_vec)
963 unsigned int inside_cost = 0, prologue_cost = 0;
964 stmt_vec_info first_stmt_info = stmt_info;
965 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
967 /* ??? Somehow we need to fix this at the callers. */
968 if (slp_node)
969 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
971 if (vls_type == VLS_STORE_INVARIANT)
973 if (slp_node)
974 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
975 1, dt, cost_vec);
976 else
977 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
978 stmt_info, 0, vect_prologue);
981 /* Grouped stores update all elements in the group at once,
982 so we want the DR for the first statement. */
983 if (!slp_node && grouped_access_p)
984 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
986 /* True if we should include any once-per-group costs as well as
987 the cost of the statement itself. For SLP we only get called
988 once per group anyhow. */
989 bool first_stmt_p = (first_stmt_info == stmt_info);
991 /* We assume that the cost of a single store-lanes instruction is
992 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
993 access is instead being provided by a permute-and-store operation,
994 include the cost of the permutes. */
995 if (first_stmt_p
996 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
998 /* Uses a high and low interleave or shuffle operations for each
999 needed permute. */
1000 int group_size = DR_GROUP_SIZE (first_stmt_info);
1001 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1002 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1003 stmt_info, 0, vect_body);
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE, vect_location,
1007 "vect_model_store_cost: strided group_size = %d .\n",
1008 group_size);
1011 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1012 /* Costs of the stores. */
1013 if (memory_access_type == VMAT_ELEMENTWISE
1014 || memory_access_type == VMAT_GATHER_SCATTER)
1016 /* N scalar stores plus extracting the elements. */
1017 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1018 inside_cost += record_stmt_cost (cost_vec,
1019 ncopies * assumed_nunits,
1020 scalar_store, stmt_info, 0, vect_body);
1022 else
1023 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1025 if (memory_access_type == VMAT_ELEMENTWISE
1026 || memory_access_type == VMAT_STRIDED_SLP)
1028 /* N scalar stores plus extracting the elements. */
1029 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1030 inside_cost += record_stmt_cost (cost_vec,
1031 ncopies * assumed_nunits,
1032 vec_to_scalar, stmt_info, 0, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_store_cost: inside_cost = %d, "
1038 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1042 /* Calculate cost of DR's memory access. */
1043 void
1044 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1045 unsigned int *inside_cost,
1046 stmt_vector_for_cost *body_cost_vec)
1048 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1049 int alignment_support_scheme
1050 = vect_supportable_dr_alignment (dr_info, false);
1052 switch (alignment_support_scheme)
1054 case dr_aligned:
1056 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1057 vector_store, stmt_info, 0,
1058 vect_body);
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE, vect_location,
1062 "vect_model_store_cost: aligned.\n");
1063 break;
1066 case dr_unaligned_supported:
1068 /* Here, we assign an additional cost for the unaligned store. */
1069 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1070 unaligned_store, stmt_info,
1071 DR_MISALIGNMENT (dr_info),
1072 vect_body);
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_NOTE, vect_location,
1075 "vect_model_store_cost: unaligned supported by "
1076 "hardware.\n");
1077 break;
1080 case dr_unaligned_unsupported:
1082 *inside_cost = VECT_MAX_COST;
1084 if (dump_enabled_p ())
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1086 "vect_model_store_cost: unsupported access.\n");
1087 break;
1090 default:
1091 gcc_unreachable ();
1096 /* Function vect_model_load_cost
1098 Models cost for loads. In the case of grouped accesses, one access has
1099 the overhead of the grouped access attributed to it. Since unaligned
1100 accesses are supported for loads, we also account for the costs of the
1101 access scheme chosen. */
1103 static void
1104 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1105 vect_memory_access_type memory_access_type,
1106 slp_instance instance,
1107 slp_tree slp_node,
1108 stmt_vector_for_cost *cost_vec)
1110 unsigned int inside_cost = 0, prologue_cost = 0;
1111 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1113 gcc_assert (cost_vec);
1115 /* ??? Somehow we need to fix this at the callers. */
1116 if (slp_node)
1117 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1119 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1121 /* If the load is permuted then the alignment is determined by
1122 the first group element not by the first scalar stmt DR. */
1123 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1124 /* Record the cost for the permutation. */
1125 unsigned n_perms;
1126 unsigned assumed_nunits
1127 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1128 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1129 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1130 slp_vf, instance, true,
1131 &n_perms);
1132 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1133 first_stmt_info, 0, vect_body);
1134 /* And adjust the number of loads performed. This handles
1135 redundancies as well as loads that are later dead. */
1136 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1137 bitmap_clear (perm);
1138 for (unsigned i = 0;
1139 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1140 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1141 ncopies = 0;
1142 bool load_seen = false;
1143 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1145 if (i % assumed_nunits == 0)
1147 if (load_seen)
1148 ncopies++;
1149 load_seen = false;
1151 if (bitmap_bit_p (perm, i))
1152 load_seen = true;
1154 if (load_seen)
1155 ncopies++;
1156 gcc_assert (ncopies
1157 <= (DR_GROUP_SIZE (first_stmt_info)
1158 - DR_GROUP_GAP (first_stmt_info)
1159 + assumed_nunits - 1) / assumed_nunits);
1162 /* Grouped loads read all elements in the group at once,
1163 so we want the DR for the first statement. */
1164 stmt_vec_info first_stmt_info = stmt_info;
1165 if (!slp_node && grouped_access_p)
1166 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1168 /* True if we should include any once-per-group costs as well as
1169 the cost of the statement itself. For SLP we only get called
1170 once per group anyhow. */
1171 bool first_stmt_p = (first_stmt_info == stmt_info);
1173 /* We assume that the cost of a single load-lanes instruction is
1174 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1175 access is instead being provided by a load-and-permute operation,
1176 include the cost of the permutes. */
1177 if (first_stmt_p
1178 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1180 /* Uses an even and odd extract operations or shuffle operations
1181 for each needed permute. */
1182 int group_size = DR_GROUP_SIZE (first_stmt_info);
1183 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1184 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1185 stmt_info, 0, vect_body);
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: strided group_size = %d .\n",
1190 group_size);
1193 /* The loads themselves. */
1194 if (memory_access_type == VMAT_ELEMENTWISE
1195 || memory_access_type == VMAT_GATHER_SCATTER)
1197 /* N scalar loads plus gathering them into a vector. */
1198 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1199 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1200 inside_cost += record_stmt_cost (cost_vec,
1201 ncopies * assumed_nunits,
1202 scalar_load, stmt_info, 0, vect_body);
1204 else
1205 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1206 &inside_cost, &prologue_cost,
1207 cost_vec, cost_vec, true);
1208 if (memory_access_type == VMAT_ELEMENTWISE
1209 || memory_access_type == VMAT_STRIDED_SLP)
1210 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1211 stmt_info, 0, vect_body);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: inside_cost = %d, "
1216 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1220 /* Calculate cost of DR's memory access. */
1221 void
1222 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1223 bool add_realign_cost, unsigned int *inside_cost,
1224 unsigned int *prologue_cost,
1225 stmt_vector_for_cost *prologue_cost_vec,
1226 stmt_vector_for_cost *body_cost_vec,
1227 bool record_prologue_costs)
1229 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1230 int alignment_support_scheme
1231 = vect_supportable_dr_alignment (dr_info, false);
1233 switch (alignment_support_scheme)
1235 case dr_aligned:
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1238 stmt_info, 0, vect_body);
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE, vect_location,
1242 "vect_model_load_cost: aligned.\n");
1244 break;
1246 case dr_unaligned_supported:
1248 /* Here, we assign an additional cost for the unaligned load. */
1249 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1250 unaligned_load, stmt_info,
1251 DR_MISALIGNMENT (dr_info),
1252 vect_body);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1259 break;
1261 case dr_explicit_realign:
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1279 break;
1281 case dr_explicit_realign_optimized:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost && record_prologue_costs)
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1316 break;
1319 case dr_unaligned_unsupported:
1321 *inside_cost = VECT_MAX_COST;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1329 default:
1330 gcc_unreachable ();
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1337 static void
1338 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1341 if (gsi)
1342 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1343 else
1345 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1347 if (loop_vinfo)
1349 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1350 basic_block new_bb;
1351 edge pe;
1353 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1354 loop = loop->inner;
1356 pe = loop_preheader_edge (loop);
1357 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1358 gcc_assert (!new_bb);
1360 else
1362 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1363 basic_block bb;
1364 gimple_stmt_iterator gsi_bb_start;
1366 gcc_assert (bb_vinfo);
1367 bb = BB_VINFO_BB (bb_vinfo);
1368 gsi_bb_start = gsi_after_labels (bb);
1369 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1373 if (dump_enabled_p ())
1374 dump_printf_loc (MSG_NOTE, vect_location,
1375 "created new init_stmt: %G", new_stmt);
1378 /* Function vect_init_vector.
1380 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1381 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1382 vector type a vector with all elements equal to VAL is created first.
1383 Place the initialization at BSI if it is not NULL. Otherwise, place the
1384 initialization at the loop preheader.
1385 Return the DEF of INIT_STMT.
1386 It will be used in the vectorization of STMT_INFO. */
1388 tree
1389 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1390 gimple_stmt_iterator *gsi)
1392 gimple *init_stmt;
1393 tree new_temp;
1395 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1396 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1398 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1399 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1401 /* Scalar boolean value should be transformed into
1402 all zeros or all ones value before building a vector. */
1403 if (VECTOR_BOOLEAN_TYPE_P (type))
1405 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1406 tree false_val = build_zero_cst (TREE_TYPE (type));
1408 if (CONSTANT_CLASS_P (val))
1409 val = integer_zerop (val) ? false_val : true_val;
1410 else
1412 new_temp = make_ssa_name (TREE_TYPE (type));
1413 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1414 val, true_val, false_val);
1415 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1416 val = new_temp;
1419 else if (CONSTANT_CLASS_P (val))
1420 val = fold_convert (TREE_TYPE (type), val);
1421 else
1423 new_temp = make_ssa_name (TREE_TYPE (type));
1424 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1425 init_stmt = gimple_build_assign (new_temp,
1426 fold_build1 (VIEW_CONVERT_EXPR,
1427 TREE_TYPE (type),
1428 val));
1429 else
1430 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1431 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1432 val = new_temp;
1435 val = build_vector_from_val (type, val);
1438 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1439 init_stmt = gimple_build_assign (new_temp, val);
1440 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1441 return new_temp;
1444 /* Function vect_get_vec_def_for_operand_1.
1446 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1447 with type DT that will be used in the vectorized stmt. */
1449 tree
1450 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1451 enum vect_def_type dt)
1453 tree vec_oprnd;
1454 stmt_vec_info vec_stmt_info;
1456 switch (dt)
1458 /* operand is a constant or a loop invariant. */
1459 case vect_constant_def:
1460 case vect_external_def:
1461 /* Code should use vect_get_vec_def_for_operand. */
1462 gcc_unreachable ();
1464 /* Operand is defined by a loop header phi. In case of nested
1465 cycles we also may have uses of the backedge def. */
1466 case vect_reduction_def:
1467 case vect_double_reduction_def:
1468 case vect_nested_cycle:
1469 case vect_induction_def:
1470 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1471 || dt == vect_nested_cycle);
1472 /* Fallthru. */
1474 /* operand is defined inside the loop. */
1475 case vect_internal_def:
1477 /* Get the def from the vectorized stmt. */
1478 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1479 /* Get vectorized pattern statement. */
1480 if (!vec_stmt_info
1481 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1482 && !STMT_VINFO_RELEVANT (def_stmt_info))
1483 vec_stmt_info = (STMT_VINFO_VEC_STMT
1484 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1485 gcc_assert (vec_stmt_info);
1486 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1487 vec_oprnd = PHI_RESULT (phi);
1488 else
1489 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1490 return vec_oprnd;
1493 default:
1494 gcc_unreachable ();
1499 /* Function vect_get_vec_def_for_operand.
1501 OP is an operand in STMT_VINFO. This function returns a (vector) def
1502 that will be used in the vectorized stmt for STMT_VINFO.
1504 In the case that OP is an SSA_NAME which is defined in the loop, then
1505 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1507 In case OP is an invariant or constant, a new stmt that creates a vector def
1508 needs to be introduced. VECTYPE may be used to specify a required type for
1509 vector invariant. */
1511 tree
1512 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1514 gimple *def_stmt;
1515 enum vect_def_type dt;
1516 bool is_simple_use;
1517 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location,
1521 "vect_get_vec_def_for_operand: %T\n", op);
1523 stmt_vec_info def_stmt_info;
1524 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1525 &def_stmt_info, &def_stmt);
1526 gcc_assert (is_simple_use);
1527 if (def_stmt && dump_enabled_p ())
1528 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1530 if (dt == vect_constant_def || dt == vect_external_def)
1532 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1533 tree vector_type;
1535 if (vectype)
1536 vector_type = vectype;
1537 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1538 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1539 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1540 else
1541 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1543 gcc_assert (vector_type);
1544 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1546 else
1547 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1551 /* Function vect_get_vec_def_for_stmt_copy
1553 Return a vector-def for an operand. This function is used when the
1554 vectorized stmt to be created (by the caller to this function) is a "copy"
1555 created in case the vectorized result cannot fit in one vector, and several
1556 copies of the vector-stmt are required. In this case the vector-def is
1557 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1558 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1560 Context:
1561 In case the vectorization factor (VF) is bigger than the number
1562 of elements that can fit in a vectype (nunits), we have to generate
1563 more than one vector stmt to vectorize the scalar stmt. This situation
1564 arises when there are multiple data-types operated upon in the loop; the
1565 smallest data-type determines the VF, and as a result, when vectorizing
1566 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1567 vector stmt (each computing a vector of 'nunits' results, and together
1568 computing 'VF' results in each iteration). This function is called when
1569 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1570 which VF=16 and nunits=4, so the number of copies required is 4):
1572 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1574 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1575 VS1.1: vx.1 = memref1 VS1.2
1576 VS1.2: vx.2 = memref2 VS1.3
1577 VS1.3: vx.3 = memref3
1579 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1580 VSnew.1: vz1 = vx.1 + ... VSnew.2
1581 VSnew.2: vz2 = vx.2 + ... VSnew.3
1582 VSnew.3: vz3 = vx.3 + ...
1584 The vectorization of S1 is explained in vectorizable_load.
1585 The vectorization of S2:
1586 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1587 the function 'vect_get_vec_def_for_operand' is called to
1588 get the relevant vector-def for each operand of S2. For operand x it
1589 returns the vector-def 'vx.0'.
1591 To create the remaining copies of the vector-stmt (VSnew.j), this
1592 function is called to get the relevant vector-def for each operand. It is
1593 obtained from the respective VS1.j stmt, which is recorded in the
1594 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1596 For example, to obtain the vector-def 'vx.1' in order to create the
1597 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1598 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1599 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1600 and return its def ('vx.1').
1601 Overall, to create the above sequence this function will be called 3 times:
1602 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1603 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1604 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1606 tree
1607 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1609 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1610 if (!def_stmt_info)
1611 /* Do nothing; can reuse same def. */
1612 return vec_oprnd;
1614 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1615 gcc_assert (def_stmt_info);
1616 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1617 vec_oprnd = PHI_RESULT (phi);
1618 else
1619 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1620 return vec_oprnd;
1624 /* Get vectorized definitions for the operands to create a copy of an original
1625 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1627 void
1628 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1629 vec<tree> *vec_oprnds0,
1630 vec<tree> *vec_oprnds1)
1632 tree vec_oprnd = vec_oprnds0->pop ();
1634 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1635 vec_oprnds0->quick_push (vec_oprnd);
1637 if (vec_oprnds1 && vec_oprnds1->length ())
1639 vec_oprnd = vec_oprnds1->pop ();
1640 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1641 vec_oprnds1->quick_push (vec_oprnd);
1646 /* Get vectorized definitions for OP0 and OP1. */
1648 void
1649 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1650 vec<tree> *vec_oprnds0,
1651 vec<tree> *vec_oprnds1,
1652 slp_tree slp_node)
1654 if (slp_node)
1656 int nops = (op1 == NULL_TREE) ? 1 : 2;
1657 auto_vec<tree> ops (nops);
1658 auto_vec<vec<tree> > vec_defs (nops);
1660 ops.quick_push (op0);
1661 if (op1)
1662 ops.quick_push (op1);
1664 vect_get_slp_defs (ops, slp_node, &vec_defs);
1666 *vec_oprnds0 = vec_defs[0];
1667 if (op1)
1668 *vec_oprnds1 = vec_defs[1];
1670 else
1672 tree vec_oprnd;
1674 vec_oprnds0->create (1);
1675 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1676 vec_oprnds0->quick_push (vec_oprnd);
1678 if (op1)
1680 vec_oprnds1->create (1);
1681 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1682 vec_oprnds1->quick_push (vec_oprnd);
1687 /* Helper function called by vect_finish_replace_stmt and
1688 vect_finish_stmt_generation. Set the location of the new
1689 statement and create and return a stmt_vec_info for it. */
1691 static stmt_vec_info
1692 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1694 vec_info *vinfo = stmt_info->vinfo;
1696 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1698 if (dump_enabled_p ())
1699 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1701 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1703 /* While EH edges will generally prevent vectorization, stmt might
1704 e.g. be in a must-not-throw region. Ensure newly created stmts
1705 that could throw are part of the same region. */
1706 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1707 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1708 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1710 return vec_stmt_info;
1713 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1714 which sets the same scalar result as STMT_INFO did. Create and return a
1715 stmt_vec_info for VEC_STMT. */
1717 stmt_vec_info
1718 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1720 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1722 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1723 gsi_replace (&gsi, vec_stmt, true);
1725 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1728 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1729 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1731 stmt_vec_info
1732 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1733 gimple_stmt_iterator *gsi)
1735 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1737 if (!gsi_end_p (*gsi)
1738 && gimple_has_mem_ops (vec_stmt))
1740 gimple *at_stmt = gsi_stmt (*gsi);
1741 tree vuse = gimple_vuse (at_stmt);
1742 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1744 tree vdef = gimple_vdef (at_stmt);
1745 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1746 /* If we have an SSA vuse and insert a store, update virtual
1747 SSA form to avoid triggering the renamer. Do so only
1748 if we can easily see all uses - which is what almost always
1749 happens with the way vectorized stmts are inserted. */
1750 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1751 && ((is_gimple_assign (vec_stmt)
1752 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1753 || (is_gimple_call (vec_stmt)
1754 && !(gimple_call_flags (vec_stmt)
1755 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1757 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1758 gimple_set_vdef (vec_stmt, new_vdef);
1759 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1763 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1764 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1767 /* We want to vectorize a call to combined function CFN with function
1768 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1769 as the types of all inputs. Check whether this is possible using
1770 an internal function, returning its code if so or IFN_LAST if not. */
1772 static internal_fn
1773 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1774 tree vectype_out, tree vectype_in)
1776 internal_fn ifn;
1777 if (internal_fn_p (cfn))
1778 ifn = as_internal_fn (cfn);
1779 else
1780 ifn = associated_internal_fn (fndecl);
1781 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1783 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1784 if (info.vectorizable)
1786 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1787 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1788 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1789 OPTIMIZE_FOR_SPEED))
1790 return ifn;
1793 return IFN_LAST;
1797 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1798 gimple_stmt_iterator *);
1800 /* Check whether a load or store statement in the loop described by
1801 LOOP_VINFO is possible in a fully-masked loop. This is testing
1802 whether the vectorizer pass has the appropriate support, as well as
1803 whether the target does.
1805 VLS_TYPE says whether the statement is a load or store and VECTYPE
1806 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1807 says how the load or store is going to be implemented and GROUP_SIZE
1808 is the number of load or store statements in the containing group.
1809 If the access is a gather load or scatter store, GS_INFO describes
1810 its arguments.
1812 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1813 supported, otherwise record the required mask types. */
1815 static void
1816 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1817 vec_load_store_type vls_type, int group_size,
1818 vect_memory_access_type memory_access_type,
1819 gather_scatter_info *gs_info)
1821 /* Invariant loads need no special support. */
1822 if (memory_access_type == VMAT_INVARIANT)
1823 return;
1825 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1826 machine_mode vecmode = TYPE_MODE (vectype);
1827 bool is_load = (vls_type == VLS_LOAD);
1828 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1830 if (is_load
1831 ? !vect_load_lanes_supported (vectype, group_size, true)
1832 : !vect_store_lanes_supported (vectype, group_size, true))
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1836 "can't use a fully-masked loop because the"
1837 " target doesn't have an appropriate masked"
1838 " load/store-lanes instruction.\n");
1839 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1840 return;
1842 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1843 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1844 return;
1847 if (memory_access_type == VMAT_GATHER_SCATTER)
1849 internal_fn ifn = (is_load
1850 ? IFN_MASK_GATHER_LOAD
1851 : IFN_MASK_SCATTER_STORE);
1852 tree offset_type = TREE_TYPE (gs_info->offset);
1853 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1854 gs_info->memory_type,
1855 TYPE_SIGN (offset_type),
1856 gs_info->scale))
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860 "can't use a fully-masked loop because the"
1861 " target doesn't have an appropriate masked"
1862 " gather load or scatter store instruction.\n");
1863 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1864 return;
1866 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1867 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1868 return;
1871 if (memory_access_type != VMAT_CONTIGUOUS
1872 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1874 /* Element X of the data must come from iteration i * VF + X of the
1875 scalar loop. We need more work to support other mappings. */
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878 "can't use a fully-masked loop because an access"
1879 " isn't contiguous.\n");
1880 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1881 return;
1884 machine_mode mask_mode;
1885 if (!(targetm.vectorize.get_mask_mode
1886 (GET_MODE_NUNITS (vecmode),
1887 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1888 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1890 if (dump_enabled_p ())
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1892 "can't use a fully-masked loop because the target"
1893 " doesn't have the appropriate masked load or"
1894 " store.\n");
1895 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1896 return;
1898 /* We might load more scalars than we need for permuting SLP loads.
1899 We checked in get_group_load_store_type that the extra elements
1900 don't leak into a new vector. */
1901 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1902 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1903 unsigned int nvectors;
1904 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1905 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1906 else
1907 gcc_unreachable ();
1910 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1911 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1912 that needs to be applied to all loads and stores in a vectorized loop.
1913 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1915 MASK_TYPE is the type of both masks. If new statements are needed,
1916 insert them before GSI. */
1918 static tree
1919 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1920 gimple_stmt_iterator *gsi)
1922 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1923 if (!loop_mask)
1924 return vec_mask;
1926 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1927 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1928 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1929 vec_mask, loop_mask);
1930 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1931 return and_res;
1934 /* Determine whether we can use a gather load or scatter store to vectorize
1935 strided load or store STMT_INFO by truncating the current offset to a
1936 smaller width. We need to be able to construct an offset vector:
1938 { 0, X, X*2, X*3, ... }
1940 without loss of precision, where X is STMT_INFO's DR_STEP.
1942 Return true if this is possible, describing the gather load or scatter
1943 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1945 static bool
1946 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1947 loop_vec_info loop_vinfo, bool masked_p,
1948 gather_scatter_info *gs_info)
1950 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1951 data_reference *dr = dr_info->dr;
1952 tree step = DR_STEP (dr);
1953 if (TREE_CODE (step) != INTEGER_CST)
1955 /* ??? Perhaps we could use range information here? */
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "cannot truncate variable step.\n");
1959 return false;
1962 /* Get the number of bits in an element. */
1963 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1964 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1965 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1967 /* Set COUNT to the upper limit on the number of elements - 1.
1968 Start with the maximum vectorization factor. */
1969 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1971 /* Try lowering COUNT to the number of scalar latch iterations. */
1972 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1973 widest_int max_iters;
1974 if (max_loop_iterations (loop, &max_iters)
1975 && max_iters < count)
1976 count = max_iters.to_shwi ();
1978 /* Try scales of 1 and the element size. */
1979 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1980 wi::overflow_type overflow = wi::OVF_NONE;
1981 for (int i = 0; i < 2; ++i)
1983 int scale = scales[i];
1984 widest_int factor;
1985 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1986 continue;
1988 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1989 in OFFSET_BITS bits. */
1990 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1991 if (overflow)
1992 continue;
1993 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1994 if (wi::min_precision (range, sign) > element_bits)
1996 overflow = wi::OVF_UNKNOWN;
1997 continue;
2000 /* See whether the target supports the operation. */
2001 tree memory_type = TREE_TYPE (DR_REF (dr));
2002 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2003 memory_type, element_bits, sign, scale,
2004 &gs_info->ifn, &gs_info->element_type))
2005 continue;
2007 tree offset_type = build_nonstandard_integer_type (element_bits,
2008 sign == UNSIGNED);
2010 gs_info->decl = NULL_TREE;
2011 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2012 but we don't need to store that here. */
2013 gs_info->base = NULL_TREE;
2014 gs_info->offset = fold_convert (offset_type, step);
2015 gs_info->offset_dt = vect_constant_def;
2016 gs_info->offset_vectype = NULL_TREE;
2017 gs_info->scale = scale;
2018 gs_info->memory_type = memory_type;
2019 return true;
2022 if (overflow && dump_enabled_p ())
2023 dump_printf_loc (MSG_NOTE, vect_location,
2024 "truncating gather/scatter offset to %d bits"
2025 " might change its value.\n", element_bits);
2027 return false;
2030 /* Return true if we can use gather/scatter internal functions to
2031 vectorize STMT_INFO, which is a grouped or strided load or store.
2032 MASKED_P is true if load or store is conditional. When returning
2033 true, fill in GS_INFO with the information required to perform the
2034 operation. */
2036 static bool
2037 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2038 loop_vec_info loop_vinfo, bool masked_p,
2039 gather_scatter_info *gs_info)
2041 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2042 || gs_info->decl)
2043 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2044 masked_p, gs_info);
2046 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2047 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2048 tree offset_type = TREE_TYPE (gs_info->offset);
2049 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2051 /* Enforced by vect_check_gather_scatter. */
2052 gcc_assert (element_bits >= offset_bits);
2054 /* If the elements are wider than the offset, convert the offset to the
2055 same width, without changing its sign. */
2056 if (element_bits > offset_bits)
2058 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2059 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2060 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_NOTE, vect_location,
2065 "using gather/scatter for strided/grouped access,"
2066 " scale = %d\n", gs_info->scale);
2068 return true;
2071 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2072 elements with a known constant step. Return -1 if that step
2073 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2075 static int
2076 compare_step_with_zero (stmt_vec_info stmt_info)
2078 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2079 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2080 size_zero_node);
2083 /* If the target supports a permute mask that reverses the elements in
2084 a vector of type VECTYPE, return that mask, otherwise return null. */
2086 static tree
2087 perm_mask_for_reverse (tree vectype)
2089 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2091 /* The encoding has a single stepped pattern. */
2092 vec_perm_builder sel (nunits, 1, 3);
2093 for (int i = 0; i < 3; ++i)
2094 sel.quick_push (nunits - 1 - i);
2096 vec_perm_indices indices (sel, 1, nunits);
2097 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2098 return NULL_TREE;
2099 return vect_gen_perm_mask_checked (vectype, indices);
2102 /* STMT_INFO is either a masked or unconditional store. Return the value
2103 being stored. */
2105 tree
2106 vect_get_store_rhs (stmt_vec_info stmt_info)
2108 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2110 gcc_assert (gimple_assign_single_p (assign));
2111 return gimple_assign_rhs1 (assign);
2113 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2115 internal_fn ifn = gimple_call_internal_fn (call);
2116 int index = internal_fn_stored_value_index (ifn);
2117 gcc_assert (index >= 0);
2118 return gimple_call_arg (call, index);
2120 gcc_unreachable ();
2123 /* A subroutine of get_load_store_type, with a subset of the same
2124 arguments. Handle the case where STMT_INFO is part of a grouped load
2125 or store.
2127 For stores, the statements in the group are all consecutive
2128 and there is no gap at the end. For loads, the statements in the
2129 group might not be consecutive; there can be gaps between statements
2130 as well as at the end. */
2132 static bool
2133 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2134 bool masked_p, vec_load_store_type vls_type,
2135 vect_memory_access_type *memory_access_type,
2136 gather_scatter_info *gs_info)
2138 vec_info *vinfo = stmt_info->vinfo;
2139 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2140 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2141 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2142 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2143 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2144 bool single_element_p = (stmt_info == first_stmt_info
2145 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2146 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2147 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2149 /* True if the vectorized statements would access beyond the last
2150 statement in the group. */
2151 bool overrun_p = false;
2153 /* True if we can cope with such overrun by peeling for gaps, so that
2154 there is at least one final scalar iteration after the vector loop. */
2155 bool can_overrun_p = (!masked_p
2156 && vls_type == VLS_LOAD
2157 && loop_vinfo
2158 && !loop->inner);
2160 /* There can only be a gap at the end of the group if the stride is
2161 known at compile time. */
2162 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2164 /* Stores can't yet have gaps. */
2165 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2167 if (slp)
2169 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2171 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2172 separated by the stride, until we have a complete vector.
2173 Fall back to scalar accesses if that isn't possible. */
2174 if (multiple_p (nunits, group_size))
2175 *memory_access_type = VMAT_STRIDED_SLP;
2176 else
2177 *memory_access_type = VMAT_ELEMENTWISE;
2179 else
2181 overrun_p = loop_vinfo && gap != 0;
2182 if (overrun_p && vls_type != VLS_LOAD)
2184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2185 "Grouped store with gaps requires"
2186 " non-consecutive accesses\n");
2187 return false;
2189 /* An overrun is fine if the trailing elements are smaller
2190 than the alignment boundary B. Every vector access will
2191 be a multiple of B and so we are guaranteed to access a
2192 non-gap element in the same B-sized block. */
2193 if (overrun_p
2194 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2195 / vect_get_scalar_dr_size (first_dr_info)))
2196 overrun_p = false;
2197 if (overrun_p && !can_overrun_p)
2199 if (dump_enabled_p ())
2200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2201 "Peeling for outer loop is not supported\n");
2202 return false;
2204 *memory_access_type = VMAT_CONTIGUOUS;
2207 else
2209 /* We can always handle this case using elementwise accesses,
2210 but see if something more efficient is available. */
2211 *memory_access_type = VMAT_ELEMENTWISE;
2213 /* If there is a gap at the end of the group then these optimizations
2214 would access excess elements in the last iteration. */
2215 bool would_overrun_p = (gap != 0);
2216 /* An overrun is fine if the trailing elements are smaller than the
2217 alignment boundary B. Every vector access will be a multiple of B
2218 and so we are guaranteed to access a non-gap element in the
2219 same B-sized block. */
2220 if (would_overrun_p
2221 && !masked_p
2222 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2223 / vect_get_scalar_dr_size (first_dr_info)))
2224 would_overrun_p = false;
2226 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2227 && (can_overrun_p || !would_overrun_p)
2228 && compare_step_with_zero (stmt_info) > 0)
2230 /* First cope with the degenerate case of a single-element
2231 vector. */
2232 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2233 *memory_access_type = VMAT_CONTIGUOUS;
2235 /* Otherwise try using LOAD/STORE_LANES. */
2236 if (*memory_access_type == VMAT_ELEMENTWISE
2237 && (vls_type == VLS_LOAD
2238 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2239 : vect_store_lanes_supported (vectype, group_size,
2240 masked_p)))
2242 *memory_access_type = VMAT_LOAD_STORE_LANES;
2243 overrun_p = would_overrun_p;
2246 /* If that fails, try using permuting loads. */
2247 if (*memory_access_type == VMAT_ELEMENTWISE
2248 && (vls_type == VLS_LOAD
2249 ? vect_grouped_load_supported (vectype, single_element_p,
2250 group_size)
2251 : vect_grouped_store_supported (vectype, group_size)))
2253 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2254 overrun_p = would_overrun_p;
2258 /* As a last resort, trying using a gather load or scatter store.
2260 ??? Although the code can handle all group sizes correctly,
2261 it probably isn't a win to use separate strided accesses based
2262 on nearby locations. Or, even if it's a win over scalar code,
2263 it might not be a win over vectorizing at a lower VF, if that
2264 allows us to use contiguous accesses. */
2265 if (*memory_access_type == VMAT_ELEMENTWISE
2266 && single_element_p
2267 && loop_vinfo
2268 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2269 masked_p, gs_info))
2270 *memory_access_type = VMAT_GATHER_SCATTER;
2273 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2275 /* STMT is the leader of the group. Check the operands of all the
2276 stmts of the group. */
2277 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2278 while (next_stmt_info)
2280 tree op = vect_get_store_rhs (next_stmt_info);
2281 enum vect_def_type dt;
2282 if (!vect_is_simple_use (op, vinfo, &dt))
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "use not simple.\n");
2287 return false;
2289 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2293 if (overrun_p)
2295 gcc_assert (can_overrun_p);
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "Data access with gaps requires scalar "
2299 "epilogue loop\n");
2300 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2303 return true;
2306 /* A subroutine of get_load_store_type, with a subset of the same
2307 arguments. Handle the case where STMT_INFO is a load or store that
2308 accesses consecutive elements with a negative step. */
2310 static vect_memory_access_type
2311 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2312 vec_load_store_type vls_type,
2313 unsigned int ncopies)
2315 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2316 dr_alignment_support alignment_support_scheme;
2318 if (ncopies > 1)
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322 "multiple types with negative step.\n");
2323 return VMAT_ELEMENTWISE;
2326 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2327 if (alignment_support_scheme != dr_aligned
2328 && alignment_support_scheme != dr_unaligned_supported)
2330 if (dump_enabled_p ())
2331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2332 "negative step but alignment required.\n");
2333 return VMAT_ELEMENTWISE;
2336 if (vls_type == VLS_STORE_INVARIANT)
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_NOTE, vect_location,
2340 "negative step with invariant source;"
2341 " no permute needed.\n");
2342 return VMAT_CONTIGUOUS_DOWN;
2345 if (!perm_mask_for_reverse (vectype))
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2349 "negative step and reversing not supported.\n");
2350 return VMAT_ELEMENTWISE;
2353 return VMAT_CONTIGUOUS_REVERSE;
2356 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2357 if there is a memory access type that the vectorized form can use,
2358 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2359 or scatters, fill in GS_INFO accordingly.
2361 SLP says whether we're performing SLP rather than loop vectorization.
2362 MASKED_P is true if the statement is conditional on a vectorized mask.
2363 VECTYPE is the vector type that the vectorized statements will use.
2364 NCOPIES is the number of vector statements that will be needed. */
2366 static bool
2367 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2368 bool masked_p, vec_load_store_type vls_type,
2369 unsigned int ncopies,
2370 vect_memory_access_type *memory_access_type,
2371 gather_scatter_info *gs_info)
2373 vec_info *vinfo = stmt_info->vinfo;
2374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2375 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2376 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2378 *memory_access_type = VMAT_GATHER_SCATTER;
2379 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2380 gcc_unreachable ();
2381 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2382 &gs_info->offset_dt,
2383 &gs_info->offset_vectype))
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "%s index use not simple.\n",
2388 vls_type == VLS_LOAD ? "gather" : "scatter");
2389 return false;
2392 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2394 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2395 vls_type, memory_access_type, gs_info))
2396 return false;
2398 else if (STMT_VINFO_STRIDED_P (stmt_info))
2400 gcc_assert (!slp);
2401 if (loop_vinfo
2402 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2403 masked_p, gs_info))
2404 *memory_access_type = VMAT_GATHER_SCATTER;
2405 else
2406 *memory_access_type = VMAT_ELEMENTWISE;
2408 else
2410 int cmp = compare_step_with_zero (stmt_info);
2411 if (cmp < 0)
2412 *memory_access_type = get_negative_load_store_type
2413 (stmt_info, vectype, vls_type, ncopies);
2414 else if (cmp == 0)
2416 gcc_assert (vls_type == VLS_LOAD);
2417 *memory_access_type = VMAT_INVARIANT;
2419 else
2420 *memory_access_type = VMAT_CONTIGUOUS;
2423 if ((*memory_access_type == VMAT_ELEMENTWISE
2424 || *memory_access_type == VMAT_STRIDED_SLP)
2425 && !nunits.is_constant ())
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "Not using elementwise accesses due to variable "
2430 "vectorization factor.\n");
2431 return false;
2434 /* FIXME: At the moment the cost model seems to underestimate the
2435 cost of using elementwise accesses. This check preserves the
2436 traditional behavior until that can be fixed. */
2437 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2438 if (!first_stmt_info)
2439 first_stmt_info = stmt_info;
2440 if (*memory_access_type == VMAT_ELEMENTWISE
2441 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2442 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2443 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2444 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2448 "not falling back to elementwise accesses\n");
2449 return false;
2451 return true;
2454 /* Return true if boolean argument MASK is suitable for vectorizing
2455 conditional load or store STMT_INFO. When returning true, store the type
2456 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2457 in *MASK_VECTYPE_OUT. */
2459 static bool
2460 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2461 vect_def_type *mask_dt_out,
2462 tree *mask_vectype_out)
2464 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2466 if (dump_enabled_p ())
2467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2468 "mask argument is not a boolean.\n");
2469 return false;
2472 if (TREE_CODE (mask) != SSA_NAME)
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2476 "mask argument is not an SSA name.\n");
2477 return false;
2480 enum vect_def_type mask_dt;
2481 tree mask_vectype;
2482 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2486 "mask use not simple.\n");
2487 return false;
2490 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2491 if (!mask_vectype)
2492 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2494 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2498 "could not find an appropriate vector mask type.\n");
2499 return false;
2502 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2503 TYPE_VECTOR_SUBPARTS (vectype)))
2505 if (dump_enabled_p ())
2506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2507 "vector mask type %T",
2508 " does not match vector data type %T.\n",
2509 mask_vectype, vectype);
2511 return false;
2514 *mask_dt_out = mask_dt;
2515 *mask_vectype_out = mask_vectype;
2516 return true;
2519 /* Return true if stored value RHS is suitable for vectorizing store
2520 statement STMT_INFO. When returning true, store the type of the
2521 definition in *RHS_DT_OUT, the type of the vectorized store value in
2522 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2524 static bool
2525 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2526 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2527 vec_load_store_type *vls_type_out)
2529 /* In the case this is a store from a constant make sure
2530 native_encode_expr can handle it. */
2531 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2533 if (dump_enabled_p ())
2534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2535 "cannot encode constant as a byte sequence.\n");
2536 return false;
2539 enum vect_def_type rhs_dt;
2540 tree rhs_vectype;
2541 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "use not simple.\n");
2546 return false;
2549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2550 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2552 if (dump_enabled_p ())
2553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2554 "incompatible vector types.\n");
2555 return false;
2558 *rhs_dt_out = rhs_dt;
2559 *rhs_vectype_out = rhs_vectype;
2560 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2561 *vls_type_out = VLS_STORE_INVARIANT;
2562 else
2563 *vls_type_out = VLS_STORE;
2564 return true;
2567 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2568 Note that we support masks with floating-point type, in which case the
2569 floats are interpreted as a bitmask. */
2571 static tree
2572 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2574 if (TREE_CODE (masktype) == INTEGER_TYPE)
2575 return build_int_cst (masktype, -1);
2576 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2578 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2579 mask = build_vector_from_val (masktype, mask);
2580 return vect_init_vector (stmt_info, mask, masktype, NULL);
2582 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2584 REAL_VALUE_TYPE r;
2585 long tmp[6];
2586 for (int j = 0; j < 6; ++j)
2587 tmp[j] = -1;
2588 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2589 tree mask = build_real (TREE_TYPE (masktype), r);
2590 mask = build_vector_from_val (masktype, mask);
2591 return vect_init_vector (stmt_info, mask, masktype, NULL);
2593 gcc_unreachable ();
2596 /* Build an all-zero merge value of type VECTYPE while vectorizing
2597 STMT_INFO as a gather load. */
2599 static tree
2600 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2602 tree merge;
2603 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2604 merge = build_int_cst (TREE_TYPE (vectype), 0);
2605 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2607 REAL_VALUE_TYPE r;
2608 long tmp[6];
2609 for (int j = 0; j < 6; ++j)
2610 tmp[j] = 0;
2611 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2612 merge = build_real (TREE_TYPE (vectype), r);
2614 else
2615 gcc_unreachable ();
2616 merge = build_vector_from_val (vectype, merge);
2617 return vect_init_vector (stmt_info, merge, vectype, NULL);
2620 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2621 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2622 the gather load operation. If the load is conditional, MASK is the
2623 unvectorized condition and MASK_DT is its definition type, otherwise
2624 MASK is null. */
2626 static void
2627 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2628 gimple_stmt_iterator *gsi,
2629 stmt_vec_info *vec_stmt,
2630 gather_scatter_info *gs_info,
2631 tree mask)
2633 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2634 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2635 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2636 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2637 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2638 edge pe = loop_preheader_edge (loop);
2639 enum { NARROW, NONE, WIDEN } modifier;
2640 poly_uint64 gather_off_nunits
2641 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2643 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2644 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2645 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2646 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2647 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2648 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2649 tree scaletype = TREE_VALUE (arglist);
2650 tree real_masktype = masktype;
2651 gcc_checking_assert (types_compatible_p (srctype, rettype)
2652 && (!mask
2653 || TREE_CODE (masktype) == INTEGER_TYPE
2654 || types_compatible_p (srctype, masktype)));
2655 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2656 masktype = build_same_sized_truth_vector_type (srctype);
2658 tree mask_halftype = masktype;
2659 tree perm_mask = NULL_TREE;
2660 tree mask_perm_mask = NULL_TREE;
2661 if (known_eq (nunits, gather_off_nunits))
2662 modifier = NONE;
2663 else if (known_eq (nunits * 2, gather_off_nunits))
2665 modifier = WIDEN;
2667 /* Currently widening gathers and scatters are only supported for
2668 fixed-length vectors. */
2669 int count = gather_off_nunits.to_constant ();
2670 vec_perm_builder sel (count, count, 1);
2671 for (int i = 0; i < count; ++i)
2672 sel.quick_push (i | (count / 2));
2674 vec_perm_indices indices (sel, 1, count);
2675 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2676 indices);
2678 else if (known_eq (nunits, gather_off_nunits * 2))
2680 modifier = NARROW;
2682 /* Currently narrowing gathers and scatters are only supported for
2683 fixed-length vectors. */
2684 int count = nunits.to_constant ();
2685 vec_perm_builder sel (count, count, 1);
2686 sel.quick_grow (count);
2687 for (int i = 0; i < count; ++i)
2688 sel[i] = i < count / 2 ? i : i + count / 2;
2689 vec_perm_indices indices (sel, 2, count);
2690 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2692 ncopies *= 2;
2694 if (mask && masktype == real_masktype)
2696 for (int i = 0; i < count; ++i)
2697 sel[i] = i | (count / 2);
2698 indices.new_vector (sel, 2, count);
2699 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2701 else if (mask)
2702 mask_halftype
2703 = build_same_sized_truth_vector_type (gs_info->offset_vectype);
2705 else
2706 gcc_unreachable ();
2708 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2709 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2711 tree ptr = fold_convert (ptrtype, gs_info->base);
2712 if (!is_gimple_min_invariant (ptr))
2714 gimple_seq seq;
2715 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2716 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2717 gcc_assert (!new_bb);
2720 tree scale = build_int_cst (scaletype, gs_info->scale);
2722 tree vec_oprnd0 = NULL_TREE;
2723 tree vec_mask = NULL_TREE;
2724 tree src_op = NULL_TREE;
2725 tree mask_op = NULL_TREE;
2726 tree prev_res = NULL_TREE;
2727 stmt_vec_info prev_stmt_info = NULL;
2729 if (!mask)
2731 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2732 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2735 for (int j = 0; j < ncopies; ++j)
2737 tree op, var;
2738 if (modifier == WIDEN && (j & 1))
2739 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2740 perm_mask, stmt_info, gsi);
2741 else if (j == 0)
2742 op = vec_oprnd0
2743 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2744 else
2745 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2746 vec_oprnd0);
2748 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2750 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2751 TYPE_VECTOR_SUBPARTS (idxtype)));
2752 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2753 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2754 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2755 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2756 op = var;
2759 if (mask)
2761 if (mask_perm_mask && (j & 1))
2762 mask_op = permute_vec_elements (mask_op, mask_op,
2763 mask_perm_mask, stmt_info, gsi);
2764 else
2766 if (j == 0)
2767 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2768 else if (modifier != NARROW || (j & 1) == 0)
2769 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2770 vec_mask);
2772 mask_op = vec_mask;
2773 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2775 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2776 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2777 gcc_assert (known_eq (sub1, sub2));
2778 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2779 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2780 gassign *new_stmt
2781 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2782 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2783 mask_op = var;
2786 if (modifier == NARROW && masktype != real_masktype)
2788 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2789 gassign *new_stmt
2790 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2791 : VEC_UNPACK_LO_EXPR,
2792 mask_op);
2793 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2794 mask_op = var;
2796 src_op = mask_op;
2799 tree mask_arg = mask_op;
2800 if (masktype != real_masktype)
2802 tree utype, optype = TREE_TYPE (mask_op);
2803 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2804 utype = real_masktype;
2805 else
2806 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2807 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2808 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2809 gassign *new_stmt
2810 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2811 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2812 mask_arg = var;
2813 if (!useless_type_conversion_p (real_masktype, utype))
2815 gcc_assert (TYPE_PRECISION (utype)
2816 <= TYPE_PRECISION (real_masktype));
2817 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2818 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2819 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2820 mask_arg = var;
2822 src_op = build_zero_cst (srctype);
2824 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2825 mask_arg, scale);
2827 stmt_vec_info new_stmt_info;
2828 if (!useless_type_conversion_p (vectype, rettype))
2830 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2831 TYPE_VECTOR_SUBPARTS (rettype)));
2832 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2833 gimple_call_set_lhs (new_call, op);
2834 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2835 var = make_ssa_name (vec_dest);
2836 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2837 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2838 new_stmt_info
2839 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2841 else
2843 var = make_ssa_name (vec_dest, new_call);
2844 gimple_call_set_lhs (new_call, var);
2845 new_stmt_info
2846 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2849 if (modifier == NARROW)
2851 if ((j & 1) == 0)
2853 prev_res = var;
2854 continue;
2856 var = permute_vec_elements (prev_res, var, perm_mask,
2857 stmt_info, gsi);
2858 new_stmt_info = loop_vinfo->lookup_def (var);
2861 if (prev_stmt_info == NULL)
2862 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2863 else
2864 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2865 prev_stmt_info = new_stmt_info;
2869 /* Prepare the base and offset in GS_INFO for vectorization.
2870 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2871 to the vectorized offset argument for the first copy of STMT_INFO.
2872 STMT_INFO is the statement described by GS_INFO and LOOP is the
2873 containing loop. */
2875 static void
2876 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2877 gather_scatter_info *gs_info,
2878 tree *dataref_ptr, tree *vec_offset)
2880 gimple_seq stmts = NULL;
2881 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2882 if (stmts != NULL)
2884 basic_block new_bb;
2885 edge pe = loop_preheader_edge (loop);
2886 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2887 gcc_assert (!new_bb);
2889 tree offset_type = TREE_TYPE (gs_info->offset);
2890 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2891 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2892 offset_vectype);
2895 /* Prepare to implement a grouped or strided load or store using
2896 the gather load or scatter store operation described by GS_INFO.
2897 STMT_INFO is the load or store statement.
2899 Set *DATAREF_BUMP to the amount that should be added to the base
2900 address after each copy of the vectorized statement. Set *VEC_OFFSET
2901 to an invariant offset vector in which element I has the value
2902 I * DR_STEP / SCALE. */
2904 static void
2905 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2906 loop_vec_info loop_vinfo,
2907 gather_scatter_info *gs_info,
2908 tree *dataref_bump, tree *vec_offset)
2910 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2911 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2912 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2913 gimple_seq stmts;
2915 tree bump = size_binop (MULT_EXPR,
2916 fold_convert (sizetype, DR_STEP (dr)),
2917 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2918 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2919 if (stmts)
2920 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2922 /* The offset given in GS_INFO can have pointer type, so use the element
2923 type of the vector instead. */
2924 tree offset_type = TREE_TYPE (gs_info->offset);
2925 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2926 offset_type = TREE_TYPE (offset_vectype);
2928 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2929 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2930 ssize_int (gs_info->scale));
2931 step = fold_convert (offset_type, step);
2932 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2934 /* Create {0, X, X*2, X*3, ...}. */
2935 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2936 build_zero_cst (offset_type), step);
2937 if (stmts)
2938 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2941 /* Return the amount that should be added to a vector pointer to move
2942 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2943 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2944 vectorization. */
2946 static tree
2947 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
2948 vect_memory_access_type memory_access_type)
2950 if (memory_access_type == VMAT_INVARIANT)
2951 return size_zero_node;
2953 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2954 tree step = vect_dr_behavior (dr_info)->step;
2955 if (tree_int_cst_sgn (step) == -1)
2956 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2957 return iv_step;
2960 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2962 static bool
2963 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2964 stmt_vec_info *vec_stmt, slp_tree slp_node,
2965 tree vectype_in, stmt_vector_for_cost *cost_vec)
2967 tree op, vectype;
2968 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2969 vec_info *vinfo = stmt_info->vinfo;
2970 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2971 unsigned ncopies;
2973 op = gimple_call_arg (stmt, 0);
2974 vectype = STMT_VINFO_VECTYPE (stmt_info);
2975 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2977 /* Multiple types in SLP are handled by creating the appropriate number of
2978 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2979 case of SLP. */
2980 if (slp_node)
2981 ncopies = 1;
2982 else
2983 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2985 gcc_assert (ncopies >= 1);
2987 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2988 if (! char_vectype)
2989 return false;
2991 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2992 unsigned word_bytes;
2993 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2994 return false;
2996 /* The encoding uses one stepped pattern for each byte in the word. */
2997 vec_perm_builder elts (num_bytes, word_bytes, 3);
2998 for (unsigned i = 0; i < 3; ++i)
2999 for (unsigned j = 0; j < word_bytes; ++j)
3000 elts.quick_push ((i + 1) * word_bytes - j - 1);
3002 vec_perm_indices indices (elts, 1, num_bytes);
3003 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3004 return false;
3006 if (! vec_stmt)
3008 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3009 DUMP_VECT_SCOPE ("vectorizable_bswap");
3010 if (! slp_node)
3012 record_stmt_cost (cost_vec,
3013 1, vector_stmt, stmt_info, 0, vect_prologue);
3014 record_stmt_cost (cost_vec,
3015 ncopies, vec_perm, stmt_info, 0, vect_body);
3017 return true;
3020 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3022 /* Transform. */
3023 vec<tree> vec_oprnds = vNULL;
3024 stmt_vec_info new_stmt_info = NULL;
3025 stmt_vec_info prev_stmt_info = NULL;
3026 for (unsigned j = 0; j < ncopies; j++)
3028 /* Handle uses. */
3029 if (j == 0)
3030 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3031 else
3032 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3034 /* Arguments are ready. create the new vector stmt. */
3035 unsigned i;
3036 tree vop;
3037 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3039 gimple *new_stmt;
3040 tree tem = make_ssa_name (char_vectype);
3041 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3042 char_vectype, vop));
3043 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3044 tree tem2 = make_ssa_name (char_vectype);
3045 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3046 tem, tem, bswap_vconst);
3047 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3048 tem = make_ssa_name (vectype);
3049 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3050 vectype, tem2));
3051 new_stmt_info
3052 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3053 if (slp_node)
3054 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3057 if (slp_node)
3058 continue;
3060 if (j == 0)
3061 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3062 else
3063 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3065 prev_stmt_info = new_stmt_info;
3068 vec_oprnds.release ();
3069 return true;
3072 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3073 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3074 in a single step. On success, store the binary pack code in
3075 *CONVERT_CODE. */
3077 static bool
3078 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3079 tree_code *convert_code)
3081 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3082 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3083 return false;
3085 tree_code code;
3086 int multi_step_cvt = 0;
3087 auto_vec <tree, 8> interm_types;
3088 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3089 &code, &multi_step_cvt,
3090 &interm_types)
3091 || multi_step_cvt)
3092 return false;
3094 *convert_code = code;
3095 return true;
3098 /* Function vectorizable_call.
3100 Check if STMT_INFO performs a function call that can be vectorized.
3101 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3102 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3103 Return true if STMT_INFO is vectorizable in this way. */
3105 static bool
3106 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3107 stmt_vec_info *vec_stmt, slp_tree slp_node,
3108 stmt_vector_for_cost *cost_vec)
3110 gcall *stmt;
3111 tree vec_dest;
3112 tree scalar_dest;
3113 tree op;
3114 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3115 stmt_vec_info prev_stmt_info;
3116 tree vectype_out, vectype_in;
3117 poly_uint64 nunits_in;
3118 poly_uint64 nunits_out;
3119 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3120 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3121 vec_info *vinfo = stmt_info->vinfo;
3122 tree fndecl, new_temp, rhs_type;
3123 enum vect_def_type dt[4]
3124 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3125 vect_unknown_def_type };
3126 tree vectypes[ARRAY_SIZE (dt)] = {};
3127 int ndts = ARRAY_SIZE (dt);
3128 int ncopies, j;
3129 auto_vec<tree, 8> vargs;
3130 auto_vec<tree, 8> orig_vargs;
3131 enum { NARROW, NONE, WIDEN } modifier;
3132 size_t i, nargs;
3133 tree lhs;
3135 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3136 return false;
3138 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3139 && ! vec_stmt)
3140 return false;
3142 /* Is STMT_INFO a vectorizable call? */
3143 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3144 if (!stmt)
3145 return false;
3147 if (gimple_call_internal_p (stmt)
3148 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3149 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3150 /* Handled by vectorizable_load and vectorizable_store. */
3151 return false;
3153 if (gimple_call_lhs (stmt) == NULL_TREE
3154 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3155 return false;
3157 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3159 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3161 /* Process function arguments. */
3162 rhs_type = NULL_TREE;
3163 vectype_in = NULL_TREE;
3164 nargs = gimple_call_num_args (stmt);
3166 /* Bail out if the function has more than three arguments, we do not have
3167 interesting builtin functions to vectorize with more than two arguments
3168 except for fma. No arguments is also not good. */
3169 if (nargs == 0 || nargs > 4)
3170 return false;
3172 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3173 combined_fn cfn = gimple_call_combined_fn (stmt);
3174 if (cfn == CFN_GOMP_SIMD_LANE)
3176 nargs = 0;
3177 rhs_type = unsigned_type_node;
3180 int mask_opno = -1;
3181 if (internal_fn_p (cfn))
3182 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3184 for (i = 0; i < nargs; i++)
3186 op = gimple_call_arg (stmt, i);
3187 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3189 if (dump_enabled_p ())
3190 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3191 "use not simple.\n");
3192 return false;
3195 /* Skip the mask argument to an internal function. This operand
3196 has been converted via a pattern if necessary. */
3197 if ((int) i == mask_opno)
3198 continue;
3200 /* We can only handle calls with arguments of the same type. */
3201 if (rhs_type
3202 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3204 if (dump_enabled_p ())
3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3206 "argument types differ.\n");
3207 return false;
3209 if (!rhs_type)
3210 rhs_type = TREE_TYPE (op);
3212 if (!vectype_in)
3213 vectype_in = vectypes[i];
3214 else if (vectypes[i]
3215 && vectypes[i] != vectype_in)
3217 if (dump_enabled_p ())
3218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3219 "argument vector types differ.\n");
3220 return false;
3223 /* If all arguments are external or constant defs use a vector type with
3224 the same size as the output vector type. */
3225 if (!vectype_in)
3226 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3227 if (vec_stmt)
3228 gcc_assert (vectype_in);
3229 if (!vectype_in)
3231 if (dump_enabled_p ())
3232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3233 "no vectype for scalar type %T\n", rhs_type);
3235 return false;
3238 /* FORNOW */
3239 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3240 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3241 if (known_eq (nunits_in * 2, nunits_out))
3242 modifier = NARROW;
3243 else if (known_eq (nunits_out, nunits_in))
3244 modifier = NONE;
3245 else if (known_eq (nunits_out * 2, nunits_in))
3246 modifier = WIDEN;
3247 else
3248 return false;
3250 /* We only handle functions that do not read or clobber memory. */
3251 if (gimple_vuse (stmt))
3253 if (dump_enabled_p ())
3254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3255 "function reads from or writes to memory.\n");
3256 return false;
3259 /* For now, we only vectorize functions if a target specific builtin
3260 is available. TODO -- in some cases, it might be profitable to
3261 insert the calls for pieces of the vector, in order to be able
3262 to vectorize other operations in the loop. */
3263 fndecl = NULL_TREE;
3264 internal_fn ifn = IFN_LAST;
3265 tree callee = gimple_call_fndecl (stmt);
3267 /* First try using an internal function. */
3268 tree_code convert_code = ERROR_MARK;
3269 if (cfn != CFN_LAST
3270 && (modifier == NONE
3271 || (modifier == NARROW
3272 && simple_integer_narrowing (vectype_out, vectype_in,
3273 &convert_code))))
3274 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3275 vectype_in);
3277 /* If that fails, try asking for a target-specific built-in function. */
3278 if (ifn == IFN_LAST)
3280 if (cfn != CFN_LAST)
3281 fndecl = targetm.vectorize.builtin_vectorized_function
3282 (cfn, vectype_out, vectype_in);
3283 else if (callee)
3284 fndecl = targetm.vectorize.builtin_md_vectorized_function
3285 (callee, vectype_out, vectype_in);
3288 if (ifn == IFN_LAST && !fndecl)
3290 if (cfn == CFN_GOMP_SIMD_LANE
3291 && !slp_node
3292 && loop_vinfo
3293 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3294 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3295 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3296 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3298 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3299 { 0, 1, 2, ... vf - 1 } vector. */
3300 gcc_assert (nargs == 0);
3302 else if (modifier == NONE
3303 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3304 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3305 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3306 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3307 vectype_in, cost_vec);
3308 else
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3312 "function is not vectorizable.\n");
3313 return false;
3317 if (slp_node)
3318 ncopies = 1;
3319 else if (modifier == NARROW && ifn == IFN_LAST)
3320 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3321 else
3322 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3324 /* Sanity check: make sure that at least one copy of the vectorized stmt
3325 needs to be generated. */
3326 gcc_assert (ncopies >= 1);
3328 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3329 if (!vec_stmt) /* transformation not required. */
3331 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3332 DUMP_VECT_SCOPE ("vectorizable_call");
3333 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3334 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3335 record_stmt_cost (cost_vec, ncopies / 2,
3336 vec_promote_demote, stmt_info, 0, vect_body);
3338 if (loop_vinfo && mask_opno >= 0)
3340 unsigned int nvectors = (slp_node
3341 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3342 : ncopies);
3343 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3345 return true;
3348 /* Transform. */
3350 if (dump_enabled_p ())
3351 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3353 /* Handle def. */
3354 scalar_dest = gimple_call_lhs (stmt);
3355 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3357 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3359 stmt_vec_info new_stmt_info = NULL;
3360 prev_stmt_info = NULL;
3361 if (modifier == NONE || ifn != IFN_LAST)
3363 tree prev_res = NULL_TREE;
3364 vargs.safe_grow (nargs);
3365 orig_vargs.safe_grow (nargs);
3366 for (j = 0; j < ncopies; ++j)
3368 /* Build argument list for the vectorized call. */
3369 if (slp_node)
3371 auto_vec<vec<tree> > vec_defs (nargs);
3372 vec<tree> vec_oprnds0;
3374 for (i = 0; i < nargs; i++)
3375 vargs[i] = gimple_call_arg (stmt, i);
3376 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3377 vec_oprnds0 = vec_defs[0];
3379 /* Arguments are ready. Create the new vector stmt. */
3380 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3382 size_t k;
3383 for (k = 0; k < nargs; k++)
3385 vec<tree> vec_oprndsk = vec_defs[k];
3386 vargs[k] = vec_oprndsk[i];
3388 if (modifier == NARROW)
3390 /* We don't define any narrowing conditional functions
3391 at present. */
3392 gcc_assert (mask_opno < 0);
3393 tree half_res = make_ssa_name (vectype_in);
3394 gcall *call
3395 = gimple_build_call_internal_vec (ifn, vargs);
3396 gimple_call_set_lhs (call, half_res);
3397 gimple_call_set_nothrow (call, true);
3398 new_stmt_info
3399 = vect_finish_stmt_generation (stmt_info, call, gsi);
3400 if ((i & 1) == 0)
3402 prev_res = half_res;
3403 continue;
3405 new_temp = make_ssa_name (vec_dest);
3406 gimple *new_stmt
3407 = gimple_build_assign (new_temp, convert_code,
3408 prev_res, half_res);
3409 new_stmt_info
3410 = vect_finish_stmt_generation (stmt_info, new_stmt,
3411 gsi);
3413 else
3415 if (mask_opno >= 0 && masked_loop_p)
3417 unsigned int vec_num = vec_oprnds0.length ();
3418 /* Always true for SLP. */
3419 gcc_assert (ncopies == 1);
3420 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3421 vectype_out, i);
3422 vargs[mask_opno] = prepare_load_store_mask
3423 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3426 gcall *call;
3427 if (ifn != IFN_LAST)
3428 call = gimple_build_call_internal_vec (ifn, vargs);
3429 else
3430 call = gimple_build_call_vec (fndecl, vargs);
3431 new_temp = make_ssa_name (vec_dest, call);
3432 gimple_call_set_lhs (call, new_temp);
3433 gimple_call_set_nothrow (call, true);
3434 new_stmt_info
3435 = vect_finish_stmt_generation (stmt_info, call, gsi);
3437 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3440 for (i = 0; i < nargs; i++)
3442 vec<tree> vec_oprndsi = vec_defs[i];
3443 vec_oprndsi.release ();
3445 continue;
3448 if (mask_opno >= 0 && !vectypes[mask_opno])
3450 gcc_assert (modifier != WIDEN);
3451 vectypes[mask_opno]
3452 = build_same_sized_truth_vector_type (vectype_in);
3455 for (i = 0; i < nargs; i++)
3457 op = gimple_call_arg (stmt, i);
3458 if (j == 0)
3459 vec_oprnd0
3460 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3461 else
3462 vec_oprnd0
3463 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3465 orig_vargs[i] = vargs[i] = vec_oprnd0;
3468 if (mask_opno >= 0 && masked_loop_p)
3470 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3471 vectype_out, j);
3472 vargs[mask_opno]
3473 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3474 vargs[mask_opno], gsi);
3477 if (cfn == CFN_GOMP_SIMD_LANE)
3479 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3480 tree new_var
3481 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3482 gimple *init_stmt = gimple_build_assign (new_var, cst);
3483 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3484 new_temp = make_ssa_name (vec_dest);
3485 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3486 new_stmt_info
3487 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3489 else if (modifier == NARROW)
3491 /* We don't define any narrowing conditional functions at
3492 present. */
3493 gcc_assert (mask_opno < 0);
3494 tree half_res = make_ssa_name (vectype_in);
3495 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3496 gimple_call_set_lhs (call, half_res);
3497 gimple_call_set_nothrow (call, true);
3498 new_stmt_info
3499 = vect_finish_stmt_generation (stmt_info, call, gsi);
3500 if ((j & 1) == 0)
3502 prev_res = half_res;
3503 continue;
3505 new_temp = make_ssa_name (vec_dest);
3506 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3507 prev_res, half_res);
3508 new_stmt_info
3509 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3511 else
3513 gcall *call;
3514 if (ifn != IFN_LAST)
3515 call = gimple_build_call_internal_vec (ifn, vargs);
3516 else
3517 call = gimple_build_call_vec (fndecl, vargs);
3518 new_temp = make_ssa_name (vec_dest, call);
3519 gimple_call_set_lhs (call, new_temp);
3520 gimple_call_set_nothrow (call, true);
3521 new_stmt_info
3522 = vect_finish_stmt_generation (stmt_info, call, gsi);
3525 if (j == (modifier == NARROW ? 1 : 0))
3526 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3527 else
3528 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3530 prev_stmt_info = new_stmt_info;
3533 else if (modifier == NARROW)
3535 /* We don't define any narrowing conditional functions at present. */
3536 gcc_assert (mask_opno < 0);
3537 for (j = 0; j < ncopies; ++j)
3539 /* Build argument list for the vectorized call. */
3540 if (j == 0)
3541 vargs.create (nargs * 2);
3542 else
3543 vargs.truncate (0);
3545 if (slp_node)
3547 auto_vec<vec<tree> > vec_defs (nargs);
3548 vec<tree> vec_oprnds0;
3550 for (i = 0; i < nargs; i++)
3551 vargs.quick_push (gimple_call_arg (stmt, i));
3552 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3553 vec_oprnds0 = vec_defs[0];
3555 /* Arguments are ready. Create the new vector stmt. */
3556 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3558 size_t k;
3559 vargs.truncate (0);
3560 for (k = 0; k < nargs; k++)
3562 vec<tree> vec_oprndsk = vec_defs[k];
3563 vargs.quick_push (vec_oprndsk[i]);
3564 vargs.quick_push (vec_oprndsk[i + 1]);
3566 gcall *call;
3567 if (ifn != IFN_LAST)
3568 call = gimple_build_call_internal_vec (ifn, vargs);
3569 else
3570 call = gimple_build_call_vec (fndecl, vargs);
3571 new_temp = make_ssa_name (vec_dest, call);
3572 gimple_call_set_lhs (call, new_temp);
3573 gimple_call_set_nothrow (call, true);
3574 new_stmt_info
3575 = vect_finish_stmt_generation (stmt_info, call, gsi);
3576 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3579 for (i = 0; i < nargs; i++)
3581 vec<tree> vec_oprndsi = vec_defs[i];
3582 vec_oprndsi.release ();
3584 continue;
3587 for (i = 0; i < nargs; i++)
3589 op = gimple_call_arg (stmt, i);
3590 if (j == 0)
3592 vec_oprnd0
3593 = vect_get_vec_def_for_operand (op, stmt_info,
3594 vectypes[i]);
3595 vec_oprnd1
3596 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3598 else
3600 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3601 2 * i + 1);
3602 vec_oprnd0
3603 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3604 vec_oprnd1
3605 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3608 vargs.quick_push (vec_oprnd0);
3609 vargs.quick_push (vec_oprnd1);
3612 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3613 new_temp = make_ssa_name (vec_dest, new_stmt);
3614 gimple_call_set_lhs (new_stmt, new_temp);
3615 new_stmt_info
3616 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3618 if (j == 0)
3619 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3620 else
3621 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3623 prev_stmt_info = new_stmt_info;
3626 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3628 else
3629 /* No current target implements this case. */
3630 return false;
3632 vargs.release ();
3634 /* The call in STMT might prevent it from being removed in dce.
3635 We however cannot remove it here, due to the way the ssa name
3636 it defines is mapped to the new definition. So just replace
3637 rhs of the statement with something harmless. */
3639 if (slp_node)
3640 return true;
3642 stmt_info = vect_orig_stmt (stmt_info);
3643 lhs = gimple_get_lhs (stmt_info->stmt);
3645 gassign *new_stmt
3646 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3647 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3649 return true;
3653 struct simd_call_arg_info
3655 tree vectype;
3656 tree op;
3657 HOST_WIDE_INT linear_step;
3658 enum vect_def_type dt;
3659 unsigned int align;
3660 bool simd_lane_linear;
3663 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3664 is linear within simd lane (but not within whole loop), note it in
3665 *ARGINFO. */
3667 static void
3668 vect_simd_lane_linear (tree op, struct loop *loop,
3669 struct simd_call_arg_info *arginfo)
3671 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3673 if (!is_gimple_assign (def_stmt)
3674 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3675 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3676 return;
3678 tree base = gimple_assign_rhs1 (def_stmt);
3679 HOST_WIDE_INT linear_step = 0;
3680 tree v = gimple_assign_rhs2 (def_stmt);
3681 while (TREE_CODE (v) == SSA_NAME)
3683 tree t;
3684 def_stmt = SSA_NAME_DEF_STMT (v);
3685 if (is_gimple_assign (def_stmt))
3686 switch (gimple_assign_rhs_code (def_stmt))
3688 case PLUS_EXPR:
3689 t = gimple_assign_rhs2 (def_stmt);
3690 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3691 return;
3692 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3693 v = gimple_assign_rhs1 (def_stmt);
3694 continue;
3695 case MULT_EXPR:
3696 t = gimple_assign_rhs2 (def_stmt);
3697 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3698 return;
3699 linear_step = tree_to_shwi (t);
3700 v = gimple_assign_rhs1 (def_stmt);
3701 continue;
3702 CASE_CONVERT:
3703 t = gimple_assign_rhs1 (def_stmt);
3704 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3705 || (TYPE_PRECISION (TREE_TYPE (v))
3706 < TYPE_PRECISION (TREE_TYPE (t))))
3707 return;
3708 if (!linear_step)
3709 linear_step = 1;
3710 v = t;
3711 continue;
3712 default:
3713 return;
3715 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3716 && loop->simduid
3717 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3718 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3719 == loop->simduid))
3721 if (!linear_step)
3722 linear_step = 1;
3723 arginfo->linear_step = linear_step;
3724 arginfo->op = base;
3725 arginfo->simd_lane_linear = true;
3726 return;
3731 /* Return the number of elements in vector type VECTYPE, which is associated
3732 with a SIMD clone. At present these vectors always have a constant
3733 length. */
3735 static unsigned HOST_WIDE_INT
3736 simd_clone_subparts (tree vectype)
3738 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3741 /* Function vectorizable_simd_clone_call.
3743 Check if STMT_INFO performs a function call that can be vectorized
3744 by calling a simd clone of the function.
3745 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3746 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3747 Return true if STMT_INFO is vectorizable in this way. */
3749 static bool
3750 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3751 gimple_stmt_iterator *gsi,
3752 stmt_vec_info *vec_stmt, slp_tree slp_node,
3753 stmt_vector_for_cost *)
3755 tree vec_dest;
3756 tree scalar_dest;
3757 tree op, type;
3758 tree vec_oprnd0 = NULL_TREE;
3759 stmt_vec_info prev_stmt_info;
3760 tree vectype;
3761 unsigned int nunits;
3762 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3763 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3764 vec_info *vinfo = stmt_info->vinfo;
3765 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3766 tree fndecl, new_temp;
3767 int ncopies, j;
3768 auto_vec<simd_call_arg_info> arginfo;
3769 vec<tree> vargs = vNULL;
3770 size_t i, nargs;
3771 tree lhs, rtype, ratype;
3772 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3774 /* Is STMT a vectorizable call? */
3775 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3776 if (!stmt)
3777 return false;
3779 fndecl = gimple_call_fndecl (stmt);
3780 if (fndecl == NULL_TREE)
3781 return false;
3783 struct cgraph_node *node = cgraph_node::get (fndecl);
3784 if (node == NULL || node->simd_clones == NULL)
3785 return false;
3787 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3788 return false;
3790 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3791 && ! vec_stmt)
3792 return false;
3794 if (gimple_call_lhs (stmt)
3795 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3796 return false;
3798 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3800 vectype = STMT_VINFO_VECTYPE (stmt_info);
3802 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3803 return false;
3805 /* FORNOW */
3806 if (slp_node)
3807 return false;
3809 /* Process function arguments. */
3810 nargs = gimple_call_num_args (stmt);
3812 /* Bail out if the function has zero arguments. */
3813 if (nargs == 0)
3814 return false;
3816 arginfo.reserve (nargs, true);
3818 for (i = 0; i < nargs; i++)
3820 simd_call_arg_info thisarginfo;
3821 affine_iv iv;
3823 thisarginfo.linear_step = 0;
3824 thisarginfo.align = 0;
3825 thisarginfo.op = NULL_TREE;
3826 thisarginfo.simd_lane_linear = false;
3828 op = gimple_call_arg (stmt, i);
3829 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3830 &thisarginfo.vectype)
3831 || thisarginfo.dt == vect_uninitialized_def)
3833 if (dump_enabled_p ())
3834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3835 "use not simple.\n");
3836 return false;
3839 if (thisarginfo.dt == vect_constant_def
3840 || thisarginfo.dt == vect_external_def)
3841 gcc_assert (thisarginfo.vectype == NULL_TREE);
3842 else
3843 gcc_assert (thisarginfo.vectype != NULL_TREE);
3845 /* For linear arguments, the analyze phase should have saved
3846 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3847 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3848 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3850 gcc_assert (vec_stmt);
3851 thisarginfo.linear_step
3852 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3853 thisarginfo.op
3854 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3855 thisarginfo.simd_lane_linear
3856 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3857 == boolean_true_node);
3858 /* If loop has been peeled for alignment, we need to adjust it. */
3859 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3860 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3861 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3863 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3864 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3865 tree opt = TREE_TYPE (thisarginfo.op);
3866 bias = fold_convert (TREE_TYPE (step), bias);
3867 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3868 thisarginfo.op
3869 = fold_build2 (POINTER_TYPE_P (opt)
3870 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3871 thisarginfo.op, bias);
3874 else if (!vec_stmt
3875 && thisarginfo.dt != vect_constant_def
3876 && thisarginfo.dt != vect_external_def
3877 && loop_vinfo
3878 && TREE_CODE (op) == SSA_NAME
3879 && simple_iv (loop, loop_containing_stmt (stmt), op,
3880 &iv, false)
3881 && tree_fits_shwi_p (iv.step))
3883 thisarginfo.linear_step = tree_to_shwi (iv.step);
3884 thisarginfo.op = iv.base;
3886 else if ((thisarginfo.dt == vect_constant_def
3887 || thisarginfo.dt == vect_external_def)
3888 && POINTER_TYPE_P (TREE_TYPE (op)))
3889 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3890 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3891 linear too. */
3892 if (POINTER_TYPE_P (TREE_TYPE (op))
3893 && !thisarginfo.linear_step
3894 && !vec_stmt
3895 && thisarginfo.dt != vect_constant_def
3896 && thisarginfo.dt != vect_external_def
3897 && loop_vinfo
3898 && !slp_node
3899 && TREE_CODE (op) == SSA_NAME)
3900 vect_simd_lane_linear (op, loop, &thisarginfo);
3902 arginfo.quick_push (thisarginfo);
3905 unsigned HOST_WIDE_INT vf;
3906 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3908 if (dump_enabled_p ())
3909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3910 "not considering SIMD clones; not yet supported"
3911 " for variable-width vectors.\n");
3912 return false;
3915 unsigned int badness = 0;
3916 struct cgraph_node *bestn = NULL;
3917 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3918 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3919 else
3920 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3921 n = n->simdclone->next_clone)
3923 unsigned int this_badness = 0;
3924 if (n->simdclone->simdlen > vf
3925 || n->simdclone->nargs != nargs)
3926 continue;
3927 if (n->simdclone->simdlen < vf)
3928 this_badness += (exact_log2 (vf)
3929 - exact_log2 (n->simdclone->simdlen)) * 1024;
3930 if (n->simdclone->inbranch)
3931 this_badness += 2048;
3932 int target_badness = targetm.simd_clone.usable (n);
3933 if (target_badness < 0)
3934 continue;
3935 this_badness += target_badness * 512;
3936 /* FORNOW: Have to add code to add the mask argument. */
3937 if (n->simdclone->inbranch)
3938 continue;
3939 for (i = 0; i < nargs; i++)
3941 switch (n->simdclone->args[i].arg_type)
3943 case SIMD_CLONE_ARG_TYPE_VECTOR:
3944 if (!useless_type_conversion_p
3945 (n->simdclone->args[i].orig_type,
3946 TREE_TYPE (gimple_call_arg (stmt, i))))
3947 i = -1;
3948 else if (arginfo[i].dt == vect_constant_def
3949 || arginfo[i].dt == vect_external_def
3950 || arginfo[i].linear_step)
3951 this_badness += 64;
3952 break;
3953 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3954 if (arginfo[i].dt != vect_constant_def
3955 && arginfo[i].dt != vect_external_def)
3956 i = -1;
3957 break;
3958 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3959 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3960 if (arginfo[i].dt == vect_constant_def
3961 || arginfo[i].dt == vect_external_def
3962 || (arginfo[i].linear_step
3963 != n->simdclone->args[i].linear_step))
3964 i = -1;
3965 break;
3966 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3967 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3968 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3969 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3970 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3971 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3972 /* FORNOW */
3973 i = -1;
3974 break;
3975 case SIMD_CLONE_ARG_TYPE_MASK:
3976 gcc_unreachable ();
3978 if (i == (size_t) -1)
3979 break;
3980 if (n->simdclone->args[i].alignment > arginfo[i].align)
3982 i = -1;
3983 break;
3985 if (arginfo[i].align)
3986 this_badness += (exact_log2 (arginfo[i].align)
3987 - exact_log2 (n->simdclone->args[i].alignment));
3989 if (i == (size_t) -1)
3990 continue;
3991 if (bestn == NULL || this_badness < badness)
3993 bestn = n;
3994 badness = this_badness;
3998 if (bestn == NULL)
3999 return false;
4001 for (i = 0; i < nargs; i++)
4002 if ((arginfo[i].dt == vect_constant_def
4003 || arginfo[i].dt == vect_external_def)
4004 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4006 arginfo[i].vectype
4007 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4008 i)));
4009 if (arginfo[i].vectype == NULL
4010 || (simd_clone_subparts (arginfo[i].vectype)
4011 > bestn->simdclone->simdlen))
4012 return false;
4015 fndecl = bestn->decl;
4016 nunits = bestn->simdclone->simdlen;
4017 ncopies = vf / nunits;
4019 /* If the function isn't const, only allow it in simd loops where user
4020 has asserted that at least nunits consecutive iterations can be
4021 performed using SIMD instructions. */
4022 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4023 && gimple_vuse (stmt))
4024 return false;
4026 /* Sanity check: make sure that at least one copy of the vectorized stmt
4027 needs to be generated. */
4028 gcc_assert (ncopies >= 1);
4030 if (!vec_stmt) /* transformation not required. */
4032 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4033 for (i = 0; i < nargs; i++)
4034 if ((bestn->simdclone->args[i].arg_type
4035 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4036 || (bestn->simdclone->args[i].arg_type
4037 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4039 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4040 + 1);
4041 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4042 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4043 ? size_type_node : TREE_TYPE (arginfo[i].op);
4044 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4046 tree sll = arginfo[i].simd_lane_linear
4047 ? boolean_true_node : boolean_false_node;
4048 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4050 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4051 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4052 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4053 return true;
4056 /* Transform. */
4058 if (dump_enabled_p ())
4059 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4061 /* Handle def. */
4062 scalar_dest = gimple_call_lhs (stmt);
4063 vec_dest = NULL_TREE;
4064 rtype = NULL_TREE;
4065 ratype = NULL_TREE;
4066 if (scalar_dest)
4068 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4069 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4070 if (TREE_CODE (rtype) == ARRAY_TYPE)
4072 ratype = rtype;
4073 rtype = TREE_TYPE (ratype);
4077 prev_stmt_info = NULL;
4078 for (j = 0; j < ncopies; ++j)
4080 /* Build argument list for the vectorized call. */
4081 if (j == 0)
4082 vargs.create (nargs);
4083 else
4084 vargs.truncate (0);
4086 for (i = 0; i < nargs; i++)
4088 unsigned int k, l, m, o;
4089 tree atype;
4090 op = gimple_call_arg (stmt, i);
4091 switch (bestn->simdclone->args[i].arg_type)
4093 case SIMD_CLONE_ARG_TYPE_VECTOR:
4094 atype = bestn->simdclone->args[i].vector_type;
4095 o = nunits / simd_clone_subparts (atype);
4096 for (m = j * o; m < (j + 1) * o; m++)
4098 if (simd_clone_subparts (atype)
4099 < simd_clone_subparts (arginfo[i].vectype))
4101 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4102 k = (simd_clone_subparts (arginfo[i].vectype)
4103 / simd_clone_subparts (atype));
4104 gcc_assert ((k & (k - 1)) == 0);
4105 if (m == 0)
4106 vec_oprnd0
4107 = vect_get_vec_def_for_operand (op, stmt_info);
4108 else
4110 vec_oprnd0 = arginfo[i].op;
4111 if ((m & (k - 1)) == 0)
4112 vec_oprnd0
4113 = vect_get_vec_def_for_stmt_copy (vinfo,
4114 vec_oprnd0);
4116 arginfo[i].op = vec_oprnd0;
4117 vec_oprnd0
4118 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4119 bitsize_int (prec),
4120 bitsize_int ((m & (k - 1)) * prec));
4121 gassign *new_stmt
4122 = gimple_build_assign (make_ssa_name (atype),
4123 vec_oprnd0);
4124 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4125 vargs.safe_push (gimple_assign_lhs (new_stmt));
4127 else
4129 k = (simd_clone_subparts (atype)
4130 / simd_clone_subparts (arginfo[i].vectype));
4131 gcc_assert ((k & (k - 1)) == 0);
4132 vec<constructor_elt, va_gc> *ctor_elts;
4133 if (k != 1)
4134 vec_alloc (ctor_elts, k);
4135 else
4136 ctor_elts = NULL;
4137 for (l = 0; l < k; l++)
4139 if (m == 0 && l == 0)
4140 vec_oprnd0
4141 = vect_get_vec_def_for_operand (op, stmt_info);
4142 else
4143 vec_oprnd0
4144 = vect_get_vec_def_for_stmt_copy (vinfo,
4145 arginfo[i].op);
4146 arginfo[i].op = vec_oprnd0;
4147 if (k == 1)
4148 break;
4149 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4150 vec_oprnd0);
4152 if (k == 1)
4153 vargs.safe_push (vec_oprnd0);
4154 else
4156 vec_oprnd0 = build_constructor (atype, ctor_elts);
4157 gassign *new_stmt
4158 = gimple_build_assign (make_ssa_name (atype),
4159 vec_oprnd0);
4160 vect_finish_stmt_generation (stmt_info, new_stmt,
4161 gsi);
4162 vargs.safe_push (gimple_assign_lhs (new_stmt));
4166 break;
4167 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4168 vargs.safe_push (op);
4169 break;
4170 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4171 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4172 if (j == 0)
4174 gimple_seq stmts;
4175 arginfo[i].op
4176 = force_gimple_operand (arginfo[i].op, &stmts, true,
4177 NULL_TREE);
4178 if (stmts != NULL)
4180 basic_block new_bb;
4181 edge pe = loop_preheader_edge (loop);
4182 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4183 gcc_assert (!new_bb);
4185 if (arginfo[i].simd_lane_linear)
4187 vargs.safe_push (arginfo[i].op);
4188 break;
4190 tree phi_res = copy_ssa_name (op);
4191 gphi *new_phi = create_phi_node (phi_res, loop->header);
4192 loop_vinfo->add_stmt (new_phi);
4193 add_phi_arg (new_phi, arginfo[i].op,
4194 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4195 enum tree_code code
4196 = POINTER_TYPE_P (TREE_TYPE (op))
4197 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4198 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4199 ? sizetype : TREE_TYPE (op);
4200 widest_int cst
4201 = wi::mul (bestn->simdclone->args[i].linear_step,
4202 ncopies * nunits);
4203 tree tcst = wide_int_to_tree (type, cst);
4204 tree phi_arg = copy_ssa_name (op);
4205 gassign *new_stmt
4206 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4207 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4208 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4209 loop_vinfo->add_stmt (new_stmt);
4210 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4211 UNKNOWN_LOCATION);
4212 arginfo[i].op = phi_res;
4213 vargs.safe_push (phi_res);
4215 else
4217 enum tree_code code
4218 = POINTER_TYPE_P (TREE_TYPE (op))
4219 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4220 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4221 ? sizetype : TREE_TYPE (op);
4222 widest_int cst
4223 = wi::mul (bestn->simdclone->args[i].linear_step,
4224 j * nunits);
4225 tree tcst = wide_int_to_tree (type, cst);
4226 new_temp = make_ssa_name (TREE_TYPE (op));
4227 gassign *new_stmt
4228 = gimple_build_assign (new_temp, code,
4229 arginfo[i].op, tcst);
4230 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4231 vargs.safe_push (new_temp);
4233 break;
4234 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4235 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4236 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4237 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4238 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4239 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4240 default:
4241 gcc_unreachable ();
4245 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4246 if (vec_dest)
4248 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4249 if (ratype)
4250 new_temp = create_tmp_var (ratype);
4251 else if (simd_clone_subparts (vectype)
4252 == simd_clone_subparts (rtype))
4253 new_temp = make_ssa_name (vec_dest, new_call);
4254 else
4255 new_temp = make_ssa_name (rtype, new_call);
4256 gimple_call_set_lhs (new_call, new_temp);
4258 stmt_vec_info new_stmt_info
4259 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4261 if (vec_dest)
4263 if (simd_clone_subparts (vectype) < nunits)
4265 unsigned int k, l;
4266 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4267 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4268 k = nunits / simd_clone_subparts (vectype);
4269 gcc_assert ((k & (k - 1)) == 0);
4270 for (l = 0; l < k; l++)
4272 tree t;
4273 if (ratype)
4275 t = build_fold_addr_expr (new_temp);
4276 t = build2 (MEM_REF, vectype, t,
4277 build_int_cst (TREE_TYPE (t), l * bytes));
4279 else
4280 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4281 bitsize_int (prec), bitsize_int (l * prec));
4282 gimple *new_stmt
4283 = gimple_build_assign (make_ssa_name (vectype), t);
4284 new_stmt_info
4285 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4287 if (j == 0 && l == 0)
4288 STMT_VINFO_VEC_STMT (stmt_info)
4289 = *vec_stmt = new_stmt_info;
4290 else
4291 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4293 prev_stmt_info = new_stmt_info;
4296 if (ratype)
4297 vect_clobber_variable (stmt_info, gsi, new_temp);
4298 continue;
4300 else if (simd_clone_subparts (vectype) > nunits)
4302 unsigned int k = (simd_clone_subparts (vectype)
4303 / simd_clone_subparts (rtype));
4304 gcc_assert ((k & (k - 1)) == 0);
4305 if ((j & (k - 1)) == 0)
4306 vec_alloc (ret_ctor_elts, k);
4307 if (ratype)
4309 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4310 for (m = 0; m < o; m++)
4312 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4313 size_int (m), NULL_TREE, NULL_TREE);
4314 gimple *new_stmt
4315 = gimple_build_assign (make_ssa_name (rtype), tem);
4316 new_stmt_info
4317 = vect_finish_stmt_generation (stmt_info, new_stmt,
4318 gsi);
4319 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4320 gimple_assign_lhs (new_stmt));
4322 vect_clobber_variable (stmt_info, gsi, new_temp);
4324 else
4325 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4326 if ((j & (k - 1)) != k - 1)
4327 continue;
4328 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4329 gimple *new_stmt
4330 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4331 new_stmt_info
4332 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4334 if ((unsigned) j == k - 1)
4335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4336 else
4337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4339 prev_stmt_info = new_stmt_info;
4340 continue;
4342 else if (ratype)
4344 tree t = build_fold_addr_expr (new_temp);
4345 t = build2 (MEM_REF, vectype, t,
4346 build_int_cst (TREE_TYPE (t), 0));
4347 gimple *new_stmt
4348 = gimple_build_assign (make_ssa_name (vec_dest), t);
4349 new_stmt_info
4350 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4351 vect_clobber_variable (stmt_info, gsi, new_temp);
4355 if (j == 0)
4356 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4357 else
4358 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4360 prev_stmt_info = new_stmt_info;
4363 vargs.release ();
4365 /* The call in STMT might prevent it from being removed in dce.
4366 We however cannot remove it here, due to the way the ssa name
4367 it defines is mapped to the new definition. So just replace
4368 rhs of the statement with something harmless. */
4370 if (slp_node)
4371 return true;
4373 gimple *new_stmt;
4374 if (scalar_dest)
4376 type = TREE_TYPE (scalar_dest);
4377 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4378 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4380 else
4381 new_stmt = gimple_build_nop ();
4382 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4383 unlink_stmt_vdef (stmt);
4385 return true;
4389 /* Function vect_gen_widened_results_half
4391 Create a vector stmt whose code, type, number of arguments, and result
4392 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4393 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4394 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4395 needs to be created (DECL is a function-decl of a target-builtin).
4396 STMT_INFO is the original scalar stmt that we are vectorizing. */
4398 static gimple *
4399 vect_gen_widened_results_half (enum tree_code code,
4400 tree decl,
4401 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4402 tree vec_dest, gimple_stmt_iterator *gsi,
4403 stmt_vec_info stmt_info)
4405 gimple *new_stmt;
4406 tree new_temp;
4408 /* Generate half of the widened result: */
4409 if (code == CALL_EXPR)
4411 /* Target specific support */
4412 if (op_type == binary_op)
4413 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4414 else
4415 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4416 new_temp = make_ssa_name (vec_dest, new_stmt);
4417 gimple_call_set_lhs (new_stmt, new_temp);
4419 else
4421 /* Generic support */
4422 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4423 if (op_type != binary_op)
4424 vec_oprnd1 = NULL;
4425 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4426 new_temp = make_ssa_name (vec_dest, new_stmt);
4427 gimple_assign_set_lhs (new_stmt, new_temp);
4429 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4431 return new_stmt;
4435 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4436 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4437 containing scalar operand), and for the rest we get a copy with
4438 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4439 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4440 The vectors are collected into VEC_OPRNDS. */
4442 static void
4443 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4444 vec<tree> *vec_oprnds, int multi_step_cvt)
4446 vec_info *vinfo = stmt_info->vinfo;
4447 tree vec_oprnd;
4449 /* Get first vector operand. */
4450 /* All the vector operands except the very first one (that is scalar oprnd)
4451 are stmt copies. */
4452 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4453 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4454 else
4455 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4457 vec_oprnds->quick_push (vec_oprnd);
4459 /* Get second vector operand. */
4460 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4461 vec_oprnds->quick_push (vec_oprnd);
4463 *oprnd = vec_oprnd;
4465 /* For conversion in multiple steps, continue to get operands
4466 recursively. */
4467 if (multi_step_cvt)
4468 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4469 multi_step_cvt - 1);
4473 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4474 For multi-step conversions store the resulting vectors and call the function
4475 recursively. */
4477 static void
4478 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4479 int multi_step_cvt,
4480 stmt_vec_info stmt_info,
4481 vec<tree> vec_dsts,
4482 gimple_stmt_iterator *gsi,
4483 slp_tree slp_node, enum tree_code code,
4484 stmt_vec_info *prev_stmt_info)
4486 unsigned int i;
4487 tree vop0, vop1, new_tmp, vec_dest;
4489 vec_dest = vec_dsts.pop ();
4491 for (i = 0; i < vec_oprnds->length (); i += 2)
4493 /* Create demotion operation. */
4494 vop0 = (*vec_oprnds)[i];
4495 vop1 = (*vec_oprnds)[i + 1];
4496 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4497 new_tmp = make_ssa_name (vec_dest, new_stmt);
4498 gimple_assign_set_lhs (new_stmt, new_tmp);
4499 stmt_vec_info new_stmt_info
4500 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4502 if (multi_step_cvt)
4503 /* Store the resulting vector for next recursive call. */
4504 (*vec_oprnds)[i/2] = new_tmp;
4505 else
4507 /* This is the last step of the conversion sequence. Store the
4508 vectors in SLP_NODE or in vector info of the scalar statement
4509 (or in STMT_VINFO_RELATED_STMT chain). */
4510 if (slp_node)
4511 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4512 else
4514 if (!*prev_stmt_info)
4515 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4516 else
4517 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4519 *prev_stmt_info = new_stmt_info;
4524 /* For multi-step demotion operations we first generate demotion operations
4525 from the source type to the intermediate types, and then combine the
4526 results (stored in VEC_OPRNDS) in demotion operation to the destination
4527 type. */
4528 if (multi_step_cvt)
4530 /* At each level of recursion we have half of the operands we had at the
4531 previous level. */
4532 vec_oprnds->truncate ((i+1)/2);
4533 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4534 stmt_info, vec_dsts, gsi,
4535 slp_node, VEC_PACK_TRUNC_EXPR,
4536 prev_stmt_info);
4539 vec_dsts.quick_push (vec_dest);
4543 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4544 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4545 STMT_INFO. For multi-step conversions store the resulting vectors and
4546 call the function recursively. */
4548 static void
4549 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4550 vec<tree> *vec_oprnds1,
4551 stmt_vec_info stmt_info, tree vec_dest,
4552 gimple_stmt_iterator *gsi,
4553 enum tree_code code1,
4554 enum tree_code code2, tree decl1,
4555 tree decl2, int op_type)
4557 int i;
4558 tree vop0, vop1, new_tmp1, new_tmp2;
4559 gimple *new_stmt1, *new_stmt2;
4560 vec<tree> vec_tmp = vNULL;
4562 vec_tmp.create (vec_oprnds0->length () * 2);
4563 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4565 if (op_type == binary_op)
4566 vop1 = (*vec_oprnds1)[i];
4567 else
4568 vop1 = NULL_TREE;
4570 /* Generate the two halves of promotion operation. */
4571 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4572 op_type, vec_dest, gsi,
4573 stmt_info);
4574 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4575 op_type, vec_dest, gsi,
4576 stmt_info);
4577 if (is_gimple_call (new_stmt1))
4579 new_tmp1 = gimple_call_lhs (new_stmt1);
4580 new_tmp2 = gimple_call_lhs (new_stmt2);
4582 else
4584 new_tmp1 = gimple_assign_lhs (new_stmt1);
4585 new_tmp2 = gimple_assign_lhs (new_stmt2);
4588 /* Store the results for the next step. */
4589 vec_tmp.quick_push (new_tmp1);
4590 vec_tmp.quick_push (new_tmp2);
4593 vec_oprnds0->release ();
4594 *vec_oprnds0 = vec_tmp;
4598 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4599 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4600 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4601 Return true if STMT_INFO is vectorizable in this way. */
4603 static bool
4604 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4605 stmt_vec_info *vec_stmt, slp_tree slp_node,
4606 stmt_vector_for_cost *cost_vec)
4608 tree vec_dest;
4609 tree scalar_dest;
4610 tree op0, op1 = NULL_TREE;
4611 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4612 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4613 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4614 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4615 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4616 tree new_temp;
4617 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4618 int ndts = 2;
4619 stmt_vec_info prev_stmt_info;
4620 poly_uint64 nunits_in;
4621 poly_uint64 nunits_out;
4622 tree vectype_out, vectype_in;
4623 int ncopies, i, j;
4624 tree lhs_type, rhs_type;
4625 enum { NARROW, NONE, WIDEN } modifier;
4626 vec<tree> vec_oprnds0 = vNULL;
4627 vec<tree> vec_oprnds1 = vNULL;
4628 tree vop0;
4629 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4630 vec_info *vinfo = stmt_info->vinfo;
4631 int multi_step_cvt = 0;
4632 vec<tree> interm_types = vNULL;
4633 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4634 int op_type;
4635 unsigned short fltsz;
4637 /* Is STMT a vectorizable conversion? */
4639 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4640 return false;
4642 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4643 && ! vec_stmt)
4644 return false;
4646 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4647 if (!stmt)
4648 return false;
4650 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4651 return false;
4653 code = gimple_assign_rhs_code (stmt);
4654 if (!CONVERT_EXPR_CODE_P (code)
4655 && code != FIX_TRUNC_EXPR
4656 && code != FLOAT_EXPR
4657 && code != WIDEN_MULT_EXPR
4658 && code != WIDEN_LSHIFT_EXPR)
4659 return false;
4661 op_type = TREE_CODE_LENGTH (code);
4663 /* Check types of lhs and rhs. */
4664 scalar_dest = gimple_assign_lhs (stmt);
4665 lhs_type = TREE_TYPE (scalar_dest);
4666 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4668 op0 = gimple_assign_rhs1 (stmt);
4669 rhs_type = TREE_TYPE (op0);
4671 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4672 && !((INTEGRAL_TYPE_P (lhs_type)
4673 && INTEGRAL_TYPE_P (rhs_type))
4674 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4675 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4676 return false;
4678 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4679 && ((INTEGRAL_TYPE_P (lhs_type)
4680 && !type_has_mode_precision_p (lhs_type))
4681 || (INTEGRAL_TYPE_P (rhs_type)
4682 && !type_has_mode_precision_p (rhs_type))))
4684 if (dump_enabled_p ())
4685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4686 "type conversion to/from bit-precision unsupported."
4687 "\n");
4688 return false;
4691 /* Check the operands of the operation. */
4692 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4694 if (dump_enabled_p ())
4695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4696 "use not simple.\n");
4697 return false;
4699 if (op_type == binary_op)
4701 bool ok;
4703 op1 = gimple_assign_rhs2 (stmt);
4704 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4705 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4706 OP1. */
4707 if (CONSTANT_CLASS_P (op0))
4708 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4709 else
4710 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4712 if (!ok)
4714 if (dump_enabled_p ())
4715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4716 "use not simple.\n");
4717 return false;
4721 /* If op0 is an external or constant defs use a vector type of
4722 the same size as the output vector type. */
4723 if (!vectype_in)
4724 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4725 if (vec_stmt)
4726 gcc_assert (vectype_in);
4727 if (!vectype_in)
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4731 "no vectype for scalar type %T\n", rhs_type);
4733 return false;
4736 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4737 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4739 if (dump_enabled_p ())
4740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4741 "can't convert between boolean and non "
4742 "boolean vectors %T\n", rhs_type);
4744 return false;
4747 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4748 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4749 if (known_eq (nunits_out, nunits_in))
4750 modifier = NONE;
4751 else if (multiple_p (nunits_out, nunits_in))
4752 modifier = NARROW;
4753 else
4755 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4756 modifier = WIDEN;
4759 /* Multiple types in SLP are handled by creating the appropriate number of
4760 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4761 case of SLP. */
4762 if (slp_node)
4763 ncopies = 1;
4764 else if (modifier == NARROW)
4765 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4766 else
4767 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4769 /* Sanity check: make sure that at least one copy of the vectorized stmt
4770 needs to be generated. */
4771 gcc_assert (ncopies >= 1);
4773 bool found_mode = false;
4774 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4775 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4776 opt_scalar_mode rhs_mode_iter;
4778 /* Supportable by target? */
4779 switch (modifier)
4781 case NONE:
4782 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4783 return false;
4784 if (supportable_convert_operation (code, vectype_out, vectype_in,
4785 &decl1, &code1))
4786 break;
4787 /* FALLTHRU */
4788 unsupported:
4789 if (dump_enabled_p ())
4790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4791 "conversion not supported by target.\n");
4792 return false;
4794 case WIDEN:
4795 if (supportable_widening_operation (code, stmt_info, vectype_out,
4796 vectype_in, &code1, &code2,
4797 &multi_step_cvt, &interm_types))
4799 /* Binary widening operation can only be supported directly by the
4800 architecture. */
4801 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4802 break;
4805 if (code != FLOAT_EXPR
4806 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4807 goto unsupported;
4809 fltsz = GET_MODE_SIZE (lhs_mode);
4810 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4812 rhs_mode = rhs_mode_iter.require ();
4813 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4814 break;
4816 cvt_type
4817 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4818 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4819 if (cvt_type == NULL_TREE)
4820 goto unsupported;
4822 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4824 if (!supportable_convert_operation (code, vectype_out,
4825 cvt_type, &decl1, &codecvt1))
4826 goto unsupported;
4828 else if (!supportable_widening_operation (code, stmt_info,
4829 vectype_out, cvt_type,
4830 &codecvt1, &codecvt2,
4831 &multi_step_cvt,
4832 &interm_types))
4833 continue;
4834 else
4835 gcc_assert (multi_step_cvt == 0);
4837 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4838 vectype_in, &code1, &code2,
4839 &multi_step_cvt, &interm_types))
4841 found_mode = true;
4842 break;
4846 if (!found_mode)
4847 goto unsupported;
4849 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4850 codecvt2 = ERROR_MARK;
4851 else
4853 multi_step_cvt++;
4854 interm_types.safe_push (cvt_type);
4855 cvt_type = NULL_TREE;
4857 break;
4859 case NARROW:
4860 gcc_assert (op_type == unary_op);
4861 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4862 &code1, &multi_step_cvt,
4863 &interm_types))
4864 break;
4866 if (code != FIX_TRUNC_EXPR
4867 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4868 goto unsupported;
4870 cvt_type
4871 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4872 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4873 if (cvt_type == NULL_TREE)
4874 goto unsupported;
4875 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4876 &decl1, &codecvt1))
4877 goto unsupported;
4878 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4879 &code1, &multi_step_cvt,
4880 &interm_types))
4881 break;
4882 goto unsupported;
4884 default:
4885 gcc_unreachable ();
4888 if (!vec_stmt) /* transformation not required. */
4890 DUMP_VECT_SCOPE ("vectorizable_conversion");
4891 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4893 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4894 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4895 cost_vec);
4897 else if (modifier == NARROW)
4899 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4900 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4901 cost_vec);
4903 else
4905 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4906 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4907 cost_vec);
4909 interm_types.release ();
4910 return true;
4913 /* Transform. */
4914 if (dump_enabled_p ())
4915 dump_printf_loc (MSG_NOTE, vect_location,
4916 "transform conversion. ncopies = %d.\n", ncopies);
4918 if (op_type == binary_op)
4920 if (CONSTANT_CLASS_P (op0))
4921 op0 = fold_convert (TREE_TYPE (op1), op0);
4922 else if (CONSTANT_CLASS_P (op1))
4923 op1 = fold_convert (TREE_TYPE (op0), op1);
4926 /* In case of multi-step conversion, we first generate conversion operations
4927 to the intermediate types, and then from that types to the final one.
4928 We create vector destinations for the intermediate type (TYPES) received
4929 from supportable_*_operation, and store them in the correct order
4930 for future use in vect_create_vectorized_*_stmts (). */
4931 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4932 vec_dest = vect_create_destination_var (scalar_dest,
4933 (cvt_type && modifier == WIDEN)
4934 ? cvt_type : vectype_out);
4935 vec_dsts.quick_push (vec_dest);
4937 if (multi_step_cvt)
4939 for (i = interm_types.length () - 1;
4940 interm_types.iterate (i, &intermediate_type); i--)
4942 vec_dest = vect_create_destination_var (scalar_dest,
4943 intermediate_type);
4944 vec_dsts.quick_push (vec_dest);
4948 if (cvt_type)
4949 vec_dest = vect_create_destination_var (scalar_dest,
4950 modifier == WIDEN
4951 ? vectype_out : cvt_type);
4953 if (!slp_node)
4955 if (modifier == WIDEN)
4957 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4958 if (op_type == binary_op)
4959 vec_oprnds1.create (1);
4961 else if (modifier == NARROW)
4962 vec_oprnds0.create (
4963 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4965 else if (code == WIDEN_LSHIFT_EXPR)
4966 vec_oprnds1.create (slp_node->vec_stmts_size);
4968 last_oprnd = op0;
4969 prev_stmt_info = NULL;
4970 switch (modifier)
4972 case NONE:
4973 for (j = 0; j < ncopies; j++)
4975 if (j == 0)
4976 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
4977 NULL, slp_node);
4978 else
4979 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
4981 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4983 stmt_vec_info new_stmt_info;
4984 /* Arguments are ready, create the new vector stmt. */
4985 if (code1 == CALL_EXPR)
4987 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4988 new_temp = make_ssa_name (vec_dest, new_stmt);
4989 gimple_call_set_lhs (new_stmt, new_temp);
4990 new_stmt_info
4991 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4993 else
4995 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4996 gassign *new_stmt
4997 = gimple_build_assign (vec_dest, code1, vop0);
4998 new_temp = make_ssa_name (vec_dest, new_stmt);
4999 gimple_assign_set_lhs (new_stmt, new_temp);
5000 new_stmt_info
5001 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5004 if (slp_node)
5005 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5006 else
5008 if (!prev_stmt_info)
5009 STMT_VINFO_VEC_STMT (stmt_info)
5010 = *vec_stmt = new_stmt_info;
5011 else
5012 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5013 prev_stmt_info = new_stmt_info;
5017 break;
5019 case WIDEN:
5020 /* In case the vectorization factor (VF) is bigger than the number
5021 of elements that we can fit in a vectype (nunits), we have to
5022 generate more than one vector stmt - i.e - we need to "unroll"
5023 the vector stmt by a factor VF/nunits. */
5024 for (j = 0; j < ncopies; j++)
5026 /* Handle uses. */
5027 if (j == 0)
5029 if (slp_node)
5031 if (code == WIDEN_LSHIFT_EXPR)
5033 unsigned int k;
5035 vec_oprnd1 = op1;
5036 /* Store vec_oprnd1 for every vector stmt to be created
5037 for SLP_NODE. We check during the analysis that all
5038 the shift arguments are the same. */
5039 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5040 vec_oprnds1.quick_push (vec_oprnd1);
5042 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5043 &vec_oprnds0, NULL, slp_node);
5045 else
5046 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5047 &vec_oprnds1, slp_node);
5049 else
5051 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5052 vec_oprnds0.quick_push (vec_oprnd0);
5053 if (op_type == binary_op)
5055 if (code == WIDEN_LSHIFT_EXPR)
5056 vec_oprnd1 = op1;
5057 else
5058 vec_oprnd1
5059 = vect_get_vec_def_for_operand (op1, stmt_info);
5060 vec_oprnds1.quick_push (vec_oprnd1);
5064 else
5066 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5067 vec_oprnds0.truncate (0);
5068 vec_oprnds0.quick_push (vec_oprnd0);
5069 if (op_type == binary_op)
5071 if (code == WIDEN_LSHIFT_EXPR)
5072 vec_oprnd1 = op1;
5073 else
5074 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5075 vec_oprnd1);
5076 vec_oprnds1.truncate (0);
5077 vec_oprnds1.quick_push (vec_oprnd1);
5081 /* Arguments are ready. Create the new vector stmts. */
5082 for (i = multi_step_cvt; i >= 0; i--)
5084 tree this_dest = vec_dsts[i];
5085 enum tree_code c1 = code1, c2 = code2;
5086 if (i == 0 && codecvt2 != ERROR_MARK)
5088 c1 = codecvt1;
5089 c2 = codecvt2;
5091 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5092 &vec_oprnds1, stmt_info,
5093 this_dest, gsi,
5094 c1, c2, decl1, decl2,
5095 op_type);
5098 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5100 stmt_vec_info new_stmt_info;
5101 if (cvt_type)
5103 if (codecvt1 == CALL_EXPR)
5105 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5106 new_temp = make_ssa_name (vec_dest, new_stmt);
5107 gimple_call_set_lhs (new_stmt, new_temp);
5108 new_stmt_info
5109 = vect_finish_stmt_generation (stmt_info, new_stmt,
5110 gsi);
5112 else
5114 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5115 new_temp = make_ssa_name (vec_dest);
5116 gassign *new_stmt
5117 = gimple_build_assign (new_temp, codecvt1, vop0);
5118 new_stmt_info
5119 = vect_finish_stmt_generation (stmt_info, new_stmt,
5120 gsi);
5123 else
5124 new_stmt_info = vinfo->lookup_def (vop0);
5126 if (slp_node)
5127 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5128 else
5130 if (!prev_stmt_info)
5131 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5132 else
5133 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5134 prev_stmt_info = new_stmt_info;
5139 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5140 break;
5142 case NARROW:
5143 /* In case the vectorization factor (VF) is bigger than the number
5144 of elements that we can fit in a vectype (nunits), we have to
5145 generate more than one vector stmt - i.e - we need to "unroll"
5146 the vector stmt by a factor VF/nunits. */
5147 for (j = 0; j < ncopies; j++)
5149 /* Handle uses. */
5150 if (slp_node)
5151 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5152 slp_node);
5153 else
5155 vec_oprnds0.truncate (0);
5156 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5157 vect_pow2 (multi_step_cvt) - 1);
5160 /* Arguments are ready. Create the new vector stmts. */
5161 if (cvt_type)
5162 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5164 if (codecvt1 == CALL_EXPR)
5166 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5167 new_temp = make_ssa_name (vec_dest, new_stmt);
5168 gimple_call_set_lhs (new_stmt, new_temp);
5169 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5171 else
5173 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5174 new_temp = make_ssa_name (vec_dest);
5175 gassign *new_stmt
5176 = gimple_build_assign (new_temp, codecvt1, vop0);
5177 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5180 vec_oprnds0[i] = new_temp;
5183 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5184 stmt_info, vec_dsts, gsi,
5185 slp_node, code1,
5186 &prev_stmt_info);
5189 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5190 break;
5193 vec_oprnds0.release ();
5194 vec_oprnds1.release ();
5195 interm_types.release ();
5197 return true;
5201 /* Function vectorizable_assignment.
5203 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5204 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5205 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5206 Return true if STMT_INFO is vectorizable in this way. */
5208 static bool
5209 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5210 stmt_vec_info *vec_stmt, slp_tree slp_node,
5211 stmt_vector_for_cost *cost_vec)
5213 tree vec_dest;
5214 tree scalar_dest;
5215 tree op;
5216 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5217 tree new_temp;
5218 enum vect_def_type dt[1] = {vect_unknown_def_type};
5219 int ndts = 1;
5220 int ncopies;
5221 int i, j;
5222 vec<tree> vec_oprnds = vNULL;
5223 tree vop;
5224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5225 vec_info *vinfo = stmt_info->vinfo;
5226 stmt_vec_info prev_stmt_info = NULL;
5227 enum tree_code code;
5228 tree vectype_in;
5230 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5231 return false;
5233 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5234 && ! vec_stmt)
5235 return false;
5237 /* Is vectorizable assignment? */
5238 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5239 if (!stmt)
5240 return false;
5242 scalar_dest = gimple_assign_lhs (stmt);
5243 if (TREE_CODE (scalar_dest) != SSA_NAME)
5244 return false;
5246 code = gimple_assign_rhs_code (stmt);
5247 if (gimple_assign_single_p (stmt)
5248 || code == PAREN_EXPR
5249 || CONVERT_EXPR_CODE_P (code))
5250 op = gimple_assign_rhs1 (stmt);
5251 else
5252 return false;
5254 if (code == VIEW_CONVERT_EXPR)
5255 op = TREE_OPERAND (op, 0);
5257 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5258 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5260 /* Multiple types in SLP are handled by creating the appropriate number of
5261 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5262 case of SLP. */
5263 if (slp_node)
5264 ncopies = 1;
5265 else
5266 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5268 gcc_assert (ncopies >= 1);
5270 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5272 if (dump_enabled_p ())
5273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5274 "use not simple.\n");
5275 return false;
5278 /* We can handle NOP_EXPR conversions that do not change the number
5279 of elements or the vector size. */
5280 if ((CONVERT_EXPR_CODE_P (code)
5281 || code == VIEW_CONVERT_EXPR)
5282 && (!vectype_in
5283 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5284 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5285 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5286 return false;
5288 /* We do not handle bit-precision changes. */
5289 if ((CONVERT_EXPR_CODE_P (code)
5290 || code == VIEW_CONVERT_EXPR)
5291 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5292 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5293 || !type_has_mode_precision_p (TREE_TYPE (op)))
5294 /* But a conversion that does not change the bit-pattern is ok. */
5295 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5296 > TYPE_PRECISION (TREE_TYPE (op)))
5297 && TYPE_UNSIGNED (TREE_TYPE (op)))
5298 /* Conversion between boolean types of different sizes is
5299 a simple assignment in case their vectypes are same
5300 boolean vectors. */
5301 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5302 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5306 "type conversion to/from bit-precision "
5307 "unsupported.\n");
5308 return false;
5311 if (!vec_stmt) /* transformation not required. */
5313 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5314 DUMP_VECT_SCOPE ("vectorizable_assignment");
5315 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5316 return true;
5319 /* Transform. */
5320 if (dump_enabled_p ())
5321 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5323 /* Handle def. */
5324 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5326 /* Handle use. */
5327 for (j = 0; j < ncopies; j++)
5329 /* Handle uses. */
5330 if (j == 0)
5331 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5332 else
5333 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5335 /* Arguments are ready. create the new vector stmt. */
5336 stmt_vec_info new_stmt_info = NULL;
5337 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5339 if (CONVERT_EXPR_CODE_P (code)
5340 || code == VIEW_CONVERT_EXPR)
5341 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5342 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5343 new_temp = make_ssa_name (vec_dest, new_stmt);
5344 gimple_assign_set_lhs (new_stmt, new_temp);
5345 new_stmt_info
5346 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5347 if (slp_node)
5348 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5351 if (slp_node)
5352 continue;
5354 if (j == 0)
5355 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5356 else
5357 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5359 prev_stmt_info = new_stmt_info;
5362 vec_oprnds.release ();
5363 return true;
5367 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5368 either as shift by a scalar or by a vector. */
5370 bool
5371 vect_supportable_shift (enum tree_code code, tree scalar_type)
5374 machine_mode vec_mode;
5375 optab optab;
5376 int icode;
5377 tree vectype;
5379 vectype = get_vectype_for_scalar_type (scalar_type);
5380 if (!vectype)
5381 return false;
5383 optab = optab_for_tree_code (code, vectype, optab_scalar);
5384 if (!optab
5385 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5387 optab = optab_for_tree_code (code, vectype, optab_vector);
5388 if (!optab
5389 || (optab_handler (optab, TYPE_MODE (vectype))
5390 == CODE_FOR_nothing))
5391 return false;
5394 vec_mode = TYPE_MODE (vectype);
5395 icode = (int) optab_handler (optab, vec_mode);
5396 if (icode == CODE_FOR_nothing)
5397 return false;
5399 return true;
5403 /* Function vectorizable_shift.
5405 Check if STMT_INFO performs a shift operation that can be vectorized.
5406 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5407 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5408 Return true if STMT_INFO is vectorizable in this way. */
5410 bool
5411 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5412 stmt_vec_info *vec_stmt, slp_tree slp_node,
5413 stmt_vector_for_cost *cost_vec)
5415 tree vec_dest;
5416 tree scalar_dest;
5417 tree op0, op1 = NULL;
5418 tree vec_oprnd1 = NULL_TREE;
5419 tree vectype;
5420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5421 enum tree_code code;
5422 machine_mode vec_mode;
5423 tree new_temp;
5424 optab optab;
5425 int icode;
5426 machine_mode optab_op2_mode;
5427 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5428 int ndts = 2;
5429 stmt_vec_info prev_stmt_info;
5430 poly_uint64 nunits_in;
5431 poly_uint64 nunits_out;
5432 tree vectype_out;
5433 tree op1_vectype;
5434 int ncopies;
5435 int j, i;
5436 vec<tree> vec_oprnds0 = vNULL;
5437 vec<tree> vec_oprnds1 = vNULL;
5438 tree vop0, vop1;
5439 unsigned int k;
5440 bool scalar_shift_arg = true;
5441 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5442 vec_info *vinfo = stmt_info->vinfo;
5444 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5445 return false;
5447 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5448 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5449 && ! vec_stmt)
5450 return false;
5452 /* Is STMT a vectorizable binary/unary operation? */
5453 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5454 if (!stmt)
5455 return false;
5457 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5458 return false;
5460 code = gimple_assign_rhs_code (stmt);
5462 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5463 || code == RROTATE_EXPR))
5464 return false;
5466 scalar_dest = gimple_assign_lhs (stmt);
5467 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5468 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5470 if (dump_enabled_p ())
5471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5472 "bit-precision shifts not supported.\n");
5473 return false;
5476 op0 = gimple_assign_rhs1 (stmt);
5477 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5479 if (dump_enabled_p ())
5480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5481 "use not simple.\n");
5482 return false;
5484 /* If op0 is an external or constant def use a vector type with
5485 the same size as the output vector type. */
5486 if (!vectype)
5487 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5488 if (vec_stmt)
5489 gcc_assert (vectype);
5490 if (!vectype)
5492 if (dump_enabled_p ())
5493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5494 "no vectype for scalar type\n");
5495 return false;
5498 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5499 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5500 if (maybe_ne (nunits_out, nunits_in))
5501 return false;
5503 op1 = gimple_assign_rhs2 (stmt);
5504 stmt_vec_info op1_def_stmt_info;
5505 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5506 &op1_def_stmt_info))
5508 if (dump_enabled_p ())
5509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5510 "use not simple.\n");
5511 return false;
5514 /* Multiple types in SLP are handled by creating the appropriate number of
5515 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5516 case of SLP. */
5517 if (slp_node)
5518 ncopies = 1;
5519 else
5520 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5522 gcc_assert (ncopies >= 1);
5524 /* Determine whether the shift amount is a vector, or scalar. If the
5525 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5527 if ((dt[1] == vect_internal_def
5528 || dt[1] == vect_induction_def
5529 || dt[1] == vect_nested_cycle)
5530 && !slp_node)
5531 scalar_shift_arg = false;
5532 else if (dt[1] == vect_constant_def
5533 || dt[1] == vect_external_def
5534 || dt[1] == vect_internal_def)
5536 /* In SLP, need to check whether the shift count is the same,
5537 in loops if it is a constant or invariant, it is always
5538 a scalar shift. */
5539 if (slp_node)
5541 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5542 stmt_vec_info slpstmt_info;
5544 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5546 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5547 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5548 scalar_shift_arg = false;
5551 /* For internal SLP defs we have to make sure we see scalar stmts
5552 for all vector elements.
5553 ??? For different vectors we could resort to a different
5554 scalar shift operand but code-generation below simply always
5555 takes the first. */
5556 if (dt[1] == vect_internal_def
5557 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5558 stmts.length ()))
5559 scalar_shift_arg = false;
5562 /* If the shift amount is computed by a pattern stmt we cannot
5563 use the scalar amount directly thus give up and use a vector
5564 shift. */
5565 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5566 scalar_shift_arg = false;
5568 else
5570 if (dump_enabled_p ())
5571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5572 "operand mode requires invariant argument.\n");
5573 return false;
5576 /* Vector shifted by vector. */
5577 if (!scalar_shift_arg)
5579 optab = optab_for_tree_code (code, vectype, optab_vector);
5580 if (dump_enabled_p ())
5581 dump_printf_loc (MSG_NOTE, vect_location,
5582 "vector/vector shift/rotate found.\n");
5584 if (!op1_vectype)
5585 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5586 if (op1_vectype == NULL_TREE
5587 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5589 if (dump_enabled_p ())
5590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5591 "unusable type for last operand in"
5592 " vector/vector shift/rotate.\n");
5593 return false;
5596 /* See if the machine has a vector shifted by scalar insn and if not
5597 then see if it has a vector shifted by vector insn. */
5598 else
5600 optab = optab_for_tree_code (code, vectype, optab_scalar);
5601 if (optab
5602 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5604 if (dump_enabled_p ())
5605 dump_printf_loc (MSG_NOTE, vect_location,
5606 "vector/scalar shift/rotate found.\n");
5608 else
5610 optab = optab_for_tree_code (code, vectype, optab_vector);
5611 if (optab
5612 && (optab_handler (optab, TYPE_MODE (vectype))
5613 != CODE_FOR_nothing))
5615 scalar_shift_arg = false;
5617 if (dump_enabled_p ())
5618 dump_printf_loc (MSG_NOTE, vect_location,
5619 "vector/vector shift/rotate found.\n");
5621 /* Unlike the other binary operators, shifts/rotates have
5622 the rhs being int, instead of the same type as the lhs,
5623 so make sure the scalar is the right type if we are
5624 dealing with vectors of long long/long/short/char. */
5625 if (dt[1] == vect_constant_def)
5626 op1 = fold_convert (TREE_TYPE (vectype), op1);
5627 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5628 TREE_TYPE (op1)))
5630 if (slp_node
5631 && TYPE_MODE (TREE_TYPE (vectype))
5632 != TYPE_MODE (TREE_TYPE (op1)))
5634 if (dump_enabled_p ())
5635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5636 "unusable type for last operand in"
5637 " vector/vector shift/rotate.\n");
5638 return false;
5640 if (vec_stmt && !slp_node)
5642 op1 = fold_convert (TREE_TYPE (vectype), op1);
5643 op1 = vect_init_vector (stmt_info, op1,
5644 TREE_TYPE (vectype), NULL);
5651 /* Supportable by target? */
5652 if (!optab)
5654 if (dump_enabled_p ())
5655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5656 "no optab.\n");
5657 return false;
5659 vec_mode = TYPE_MODE (vectype);
5660 icode = (int) optab_handler (optab, vec_mode);
5661 if (icode == CODE_FOR_nothing)
5663 if (dump_enabled_p ())
5664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5665 "op not supported by target.\n");
5666 /* Check only during analysis. */
5667 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5668 || (!vec_stmt
5669 && !vect_worthwhile_without_simd_p (vinfo, code)))
5670 return false;
5671 if (dump_enabled_p ())
5672 dump_printf_loc (MSG_NOTE, vect_location,
5673 "proceeding using word mode.\n");
5676 /* Worthwhile without SIMD support? Check only during analysis. */
5677 if (!vec_stmt
5678 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5679 && !vect_worthwhile_without_simd_p (vinfo, code))
5681 if (dump_enabled_p ())
5682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5683 "not worthwhile without SIMD support.\n");
5684 return false;
5687 if (!vec_stmt) /* transformation not required. */
5689 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5690 DUMP_VECT_SCOPE ("vectorizable_shift");
5691 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5692 return true;
5695 /* Transform. */
5697 if (dump_enabled_p ())
5698 dump_printf_loc (MSG_NOTE, vect_location,
5699 "transform binary/unary operation.\n");
5701 /* Handle def. */
5702 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5704 prev_stmt_info = NULL;
5705 for (j = 0; j < ncopies; j++)
5707 /* Handle uses. */
5708 if (j == 0)
5710 if (scalar_shift_arg)
5712 /* Vector shl and shr insn patterns can be defined with scalar
5713 operand 2 (shift operand). In this case, use constant or loop
5714 invariant op1 directly, without extending it to vector mode
5715 first. */
5716 optab_op2_mode = insn_data[icode].operand[2].mode;
5717 if (!VECTOR_MODE_P (optab_op2_mode))
5719 if (dump_enabled_p ())
5720 dump_printf_loc (MSG_NOTE, vect_location,
5721 "operand 1 using scalar mode.\n");
5722 vec_oprnd1 = op1;
5723 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5724 vec_oprnds1.quick_push (vec_oprnd1);
5725 if (slp_node)
5727 /* Store vec_oprnd1 for every vector stmt to be created
5728 for SLP_NODE. We check during the analysis that all
5729 the shift arguments are the same.
5730 TODO: Allow different constants for different vector
5731 stmts generated for an SLP instance. */
5732 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5733 vec_oprnds1.quick_push (vec_oprnd1);
5738 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5739 (a special case for certain kind of vector shifts); otherwise,
5740 operand 1 should be of a vector type (the usual case). */
5741 if (vec_oprnd1)
5742 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5743 slp_node);
5744 else
5745 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5746 slp_node);
5748 else
5749 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5751 /* Arguments are ready. Create the new vector stmt. */
5752 stmt_vec_info new_stmt_info = NULL;
5753 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5755 vop1 = vec_oprnds1[i];
5756 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5757 new_temp = make_ssa_name (vec_dest, new_stmt);
5758 gimple_assign_set_lhs (new_stmt, new_temp);
5759 new_stmt_info
5760 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5761 if (slp_node)
5762 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5765 if (slp_node)
5766 continue;
5768 if (j == 0)
5769 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5770 else
5771 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5772 prev_stmt_info = new_stmt_info;
5775 vec_oprnds0.release ();
5776 vec_oprnds1.release ();
5778 return true;
5782 /* Function vectorizable_operation.
5784 Check if STMT_INFO performs a binary, unary or ternary operation that can
5785 be vectorized.
5786 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5787 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5788 Return true if STMT_INFO is vectorizable in this way. */
5790 static bool
5791 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5792 stmt_vec_info *vec_stmt, slp_tree slp_node,
5793 stmt_vector_for_cost *cost_vec)
5795 tree vec_dest;
5796 tree scalar_dest;
5797 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5798 tree vectype;
5799 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5800 enum tree_code code, orig_code;
5801 machine_mode vec_mode;
5802 tree new_temp;
5803 int op_type;
5804 optab optab;
5805 bool target_support_p;
5806 enum vect_def_type dt[3]
5807 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5808 int ndts = 3;
5809 stmt_vec_info prev_stmt_info;
5810 poly_uint64 nunits_in;
5811 poly_uint64 nunits_out;
5812 tree vectype_out;
5813 int ncopies;
5814 int j, i;
5815 vec<tree> vec_oprnds0 = vNULL;
5816 vec<tree> vec_oprnds1 = vNULL;
5817 vec<tree> vec_oprnds2 = vNULL;
5818 tree vop0, vop1, vop2;
5819 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5820 vec_info *vinfo = stmt_info->vinfo;
5822 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5823 return false;
5825 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5826 && ! vec_stmt)
5827 return false;
5829 /* Is STMT a vectorizable binary/unary operation? */
5830 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5831 if (!stmt)
5832 return false;
5834 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5835 return false;
5837 orig_code = code = gimple_assign_rhs_code (stmt);
5839 /* For pointer addition and subtraction, we should use the normal
5840 plus and minus for the vector operation. */
5841 if (code == POINTER_PLUS_EXPR)
5842 code = PLUS_EXPR;
5843 if (code == POINTER_DIFF_EXPR)
5844 code = MINUS_EXPR;
5846 /* Support only unary or binary operations. */
5847 op_type = TREE_CODE_LENGTH (code);
5848 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5850 if (dump_enabled_p ())
5851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5852 "num. args = %d (not unary/binary/ternary op).\n",
5853 op_type);
5854 return false;
5857 scalar_dest = gimple_assign_lhs (stmt);
5858 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5860 /* Most operations cannot handle bit-precision types without extra
5861 truncations. */
5862 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5863 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5864 /* Exception are bitwise binary operations. */
5865 && code != BIT_IOR_EXPR
5866 && code != BIT_XOR_EXPR
5867 && code != BIT_AND_EXPR)
5869 if (dump_enabled_p ())
5870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5871 "bit-precision arithmetic not supported.\n");
5872 return false;
5875 op0 = gimple_assign_rhs1 (stmt);
5876 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5878 if (dump_enabled_p ())
5879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5880 "use not simple.\n");
5881 return false;
5883 /* If op0 is an external or constant def use a vector type with
5884 the same size as the output vector type. */
5885 if (!vectype)
5887 /* For boolean type we cannot determine vectype by
5888 invariant value (don't know whether it is a vector
5889 of booleans or vector of integers). We use output
5890 vectype because operations on boolean don't change
5891 type. */
5892 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5894 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5896 if (dump_enabled_p ())
5897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5898 "not supported operation on bool value.\n");
5899 return false;
5901 vectype = vectype_out;
5903 else
5904 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5906 if (vec_stmt)
5907 gcc_assert (vectype);
5908 if (!vectype)
5910 if (dump_enabled_p ())
5911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5912 "no vectype for scalar type %T\n",
5913 TREE_TYPE (op0));
5915 return false;
5918 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5919 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5920 if (maybe_ne (nunits_out, nunits_in))
5921 return false;
5923 if (op_type == binary_op || op_type == ternary_op)
5925 op1 = gimple_assign_rhs2 (stmt);
5926 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5928 if (dump_enabled_p ())
5929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5930 "use not simple.\n");
5931 return false;
5934 if (op_type == ternary_op)
5936 op2 = gimple_assign_rhs3 (stmt);
5937 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
5939 if (dump_enabled_p ())
5940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5941 "use not simple.\n");
5942 return false;
5946 /* Multiple types in SLP are handled by creating the appropriate number of
5947 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5948 case of SLP. */
5949 if (slp_node)
5950 ncopies = 1;
5951 else
5952 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5954 gcc_assert (ncopies >= 1);
5956 /* Shifts are handled in vectorizable_shift (). */
5957 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5958 || code == RROTATE_EXPR)
5959 return false;
5961 /* Supportable by target? */
5963 vec_mode = TYPE_MODE (vectype);
5964 if (code == MULT_HIGHPART_EXPR)
5965 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5966 else
5968 optab = optab_for_tree_code (code, vectype, optab_default);
5969 if (!optab)
5971 if (dump_enabled_p ())
5972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5973 "no optab.\n");
5974 return false;
5976 target_support_p = (optab_handler (optab, vec_mode)
5977 != CODE_FOR_nothing);
5980 if (!target_support_p)
5982 if (dump_enabled_p ())
5983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5984 "op not supported by target.\n");
5985 /* Check only during analysis. */
5986 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5987 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5988 return false;
5989 if (dump_enabled_p ())
5990 dump_printf_loc (MSG_NOTE, vect_location,
5991 "proceeding using word mode.\n");
5994 /* Worthwhile without SIMD support? Check only during analysis. */
5995 if (!VECTOR_MODE_P (vec_mode)
5996 && !vec_stmt
5997 && !vect_worthwhile_without_simd_p (vinfo, code))
5999 if (dump_enabled_p ())
6000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6001 "not worthwhile without SIMD support.\n");
6002 return false;
6005 if (!vec_stmt) /* transformation not required. */
6007 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6008 DUMP_VECT_SCOPE ("vectorizable_operation");
6009 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6010 return true;
6013 /* Transform. */
6015 if (dump_enabled_p ())
6016 dump_printf_loc (MSG_NOTE, vect_location,
6017 "transform binary/unary operation.\n");
6019 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6020 vectors with unsigned elements, but the result is signed. So, we
6021 need to compute the MINUS_EXPR into vectype temporary and
6022 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6023 tree vec_cvt_dest = NULL_TREE;
6024 if (orig_code == POINTER_DIFF_EXPR)
6026 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6027 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6029 /* Handle def. */
6030 else
6031 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6033 /* In case the vectorization factor (VF) is bigger than the number
6034 of elements that we can fit in a vectype (nunits), we have to generate
6035 more than one vector stmt - i.e - we need to "unroll" the
6036 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6037 from one copy of the vector stmt to the next, in the field
6038 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6039 stages to find the correct vector defs to be used when vectorizing
6040 stmts that use the defs of the current stmt. The example below
6041 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6042 we need to create 4 vectorized stmts):
6044 before vectorization:
6045 RELATED_STMT VEC_STMT
6046 S1: x = memref - -
6047 S2: z = x + 1 - -
6049 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6050 there):
6051 RELATED_STMT VEC_STMT
6052 VS1_0: vx0 = memref0 VS1_1 -
6053 VS1_1: vx1 = memref1 VS1_2 -
6054 VS1_2: vx2 = memref2 VS1_3 -
6055 VS1_3: vx3 = memref3 - -
6056 S1: x = load - VS1_0
6057 S2: z = x + 1 - -
6059 step2: vectorize stmt S2 (done here):
6060 To vectorize stmt S2 we first need to find the relevant vector
6061 def for the first operand 'x'. This is, as usual, obtained from
6062 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6063 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6064 relevant vector def 'vx0'. Having found 'vx0' we can generate
6065 the vector stmt VS2_0, and as usual, record it in the
6066 STMT_VINFO_VEC_STMT of stmt S2.
6067 When creating the second copy (VS2_1), we obtain the relevant vector
6068 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6069 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6070 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6071 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6072 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6073 chain of stmts and pointers:
6074 RELATED_STMT VEC_STMT
6075 VS1_0: vx0 = memref0 VS1_1 -
6076 VS1_1: vx1 = memref1 VS1_2 -
6077 VS1_2: vx2 = memref2 VS1_3 -
6078 VS1_3: vx3 = memref3 - -
6079 S1: x = load - VS1_0
6080 VS2_0: vz0 = vx0 + v1 VS2_1 -
6081 VS2_1: vz1 = vx1 + v1 VS2_2 -
6082 VS2_2: vz2 = vx2 + v1 VS2_3 -
6083 VS2_3: vz3 = vx3 + v1 - -
6084 S2: z = x + 1 - VS2_0 */
6086 prev_stmt_info = NULL;
6087 for (j = 0; j < ncopies; j++)
6089 /* Handle uses. */
6090 if (j == 0)
6092 if (op_type == binary_op)
6093 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6094 slp_node);
6095 else if (op_type == ternary_op)
6097 if (slp_node)
6099 auto_vec<tree> ops(3);
6100 ops.quick_push (op0);
6101 ops.quick_push (op1);
6102 ops.quick_push (op2);
6103 auto_vec<vec<tree> > vec_defs(3);
6104 vect_get_slp_defs (ops, slp_node, &vec_defs);
6105 vec_oprnds0 = vec_defs[0];
6106 vec_oprnds1 = vec_defs[1];
6107 vec_oprnds2 = vec_defs[2];
6109 else
6111 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6112 &vec_oprnds1, NULL);
6113 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6114 NULL, NULL);
6117 else
6118 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6119 slp_node);
6121 else
6123 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6124 if (op_type == ternary_op)
6126 tree vec_oprnd = vec_oprnds2.pop ();
6127 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6128 vec_oprnd));
6132 /* Arguments are ready. Create the new vector stmt. */
6133 stmt_vec_info new_stmt_info = NULL;
6134 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6136 vop1 = ((op_type == binary_op || op_type == ternary_op)
6137 ? vec_oprnds1[i] : NULL_TREE);
6138 vop2 = ((op_type == ternary_op)
6139 ? vec_oprnds2[i] : NULL_TREE);
6140 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6141 vop0, vop1, vop2);
6142 new_temp = make_ssa_name (vec_dest, new_stmt);
6143 gimple_assign_set_lhs (new_stmt, new_temp);
6144 new_stmt_info
6145 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6146 if (vec_cvt_dest)
6148 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6149 gassign *new_stmt
6150 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6151 new_temp);
6152 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6153 gimple_assign_set_lhs (new_stmt, new_temp);
6154 new_stmt_info
6155 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6157 if (slp_node)
6158 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6161 if (slp_node)
6162 continue;
6164 if (j == 0)
6165 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6166 else
6167 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6168 prev_stmt_info = new_stmt_info;
6171 vec_oprnds0.release ();
6172 vec_oprnds1.release ();
6173 vec_oprnds2.release ();
6175 return true;
6178 /* A helper function to ensure data reference DR_INFO's base alignment. */
6180 static void
6181 ensure_base_align (dr_vec_info *dr_info)
6183 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6184 return;
6186 if (dr_info->base_misaligned)
6188 tree base_decl = dr_info->base_decl;
6190 // We should only be able to increase the alignment of a base object if
6191 // we know what its new alignment should be at compile time.
6192 unsigned HOST_WIDE_INT align_base_to =
6193 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6195 if (decl_in_symtab_p (base_decl))
6196 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6197 else
6199 SET_DECL_ALIGN (base_decl, align_base_to);
6200 DECL_USER_ALIGN (base_decl) = 1;
6202 dr_info->base_misaligned = false;
6207 /* Function get_group_alias_ptr_type.
6209 Return the alias type for the group starting at FIRST_STMT_INFO. */
6211 static tree
6212 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6214 struct data_reference *first_dr, *next_dr;
6216 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6217 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6218 while (next_stmt_info)
6220 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6221 if (get_alias_set (DR_REF (first_dr))
6222 != get_alias_set (DR_REF (next_dr)))
6224 if (dump_enabled_p ())
6225 dump_printf_loc (MSG_NOTE, vect_location,
6226 "conflicting alias set types.\n");
6227 return ptr_type_node;
6229 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6231 return reference_alias_ptr_type (DR_REF (first_dr));
6235 /* Function vectorizable_store.
6237 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6238 that can be vectorized.
6239 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6240 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6241 Return true if STMT_INFO is vectorizable in this way. */
6243 static bool
6244 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6245 stmt_vec_info *vec_stmt, slp_tree slp_node,
6246 stmt_vector_for_cost *cost_vec)
6248 tree data_ref;
6249 tree op;
6250 tree vec_oprnd = NULL_TREE;
6251 tree elem_type;
6252 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6253 struct loop *loop = NULL;
6254 machine_mode vec_mode;
6255 tree dummy;
6256 enum dr_alignment_support alignment_support_scheme;
6257 enum vect_def_type rhs_dt = vect_unknown_def_type;
6258 enum vect_def_type mask_dt = vect_unknown_def_type;
6259 stmt_vec_info prev_stmt_info = NULL;
6260 tree dataref_ptr = NULL_TREE;
6261 tree dataref_offset = NULL_TREE;
6262 gimple *ptr_incr = NULL;
6263 int ncopies;
6264 int j;
6265 stmt_vec_info first_stmt_info;
6266 bool grouped_store;
6267 unsigned int group_size, i;
6268 vec<tree> oprnds = vNULL;
6269 vec<tree> result_chain = vNULL;
6270 tree offset = NULL_TREE;
6271 vec<tree> vec_oprnds = vNULL;
6272 bool slp = (slp_node != NULL);
6273 unsigned int vec_num;
6274 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6275 vec_info *vinfo = stmt_info->vinfo;
6276 tree aggr_type;
6277 gather_scatter_info gs_info;
6278 poly_uint64 vf;
6279 vec_load_store_type vls_type;
6280 tree ref_type;
6282 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6283 return false;
6285 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6286 && ! vec_stmt)
6287 return false;
6289 /* Is vectorizable store? */
6291 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6292 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6294 tree scalar_dest = gimple_assign_lhs (assign);
6295 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6296 && is_pattern_stmt_p (stmt_info))
6297 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6298 if (TREE_CODE (scalar_dest) != ARRAY_REF
6299 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6300 && TREE_CODE (scalar_dest) != INDIRECT_REF
6301 && TREE_CODE (scalar_dest) != COMPONENT_REF
6302 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6303 && TREE_CODE (scalar_dest) != REALPART_EXPR
6304 && TREE_CODE (scalar_dest) != MEM_REF)
6305 return false;
6307 else
6309 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6310 if (!call || !gimple_call_internal_p (call))
6311 return false;
6313 internal_fn ifn = gimple_call_internal_fn (call);
6314 if (!internal_store_fn_p (ifn))
6315 return false;
6317 if (slp_node != NULL)
6319 if (dump_enabled_p ())
6320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6321 "SLP of masked stores not supported.\n");
6322 return false;
6325 int mask_index = internal_fn_mask_index (ifn);
6326 if (mask_index >= 0)
6328 mask = gimple_call_arg (call, mask_index);
6329 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6330 &mask_vectype))
6331 return false;
6335 op = vect_get_store_rhs (stmt_info);
6337 /* Cannot have hybrid store SLP -- that would mean storing to the
6338 same location twice. */
6339 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6341 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6342 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6344 if (loop_vinfo)
6346 loop = LOOP_VINFO_LOOP (loop_vinfo);
6347 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6349 else
6350 vf = 1;
6352 /* Multiple types in SLP are handled by creating the appropriate number of
6353 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6354 case of SLP. */
6355 if (slp)
6356 ncopies = 1;
6357 else
6358 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6360 gcc_assert (ncopies >= 1);
6362 /* FORNOW. This restriction should be relaxed. */
6363 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6365 if (dump_enabled_p ())
6366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6367 "multiple types in nested loop.\n");
6368 return false;
6371 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6372 return false;
6374 elem_type = TREE_TYPE (vectype);
6375 vec_mode = TYPE_MODE (vectype);
6377 if (!STMT_VINFO_DATA_REF (stmt_info))
6378 return false;
6380 vect_memory_access_type memory_access_type;
6381 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6382 &memory_access_type, &gs_info))
6383 return false;
6385 if (mask)
6387 if (memory_access_type == VMAT_CONTIGUOUS)
6389 if (!VECTOR_MODE_P (vec_mode)
6390 || !can_vec_mask_load_store_p (vec_mode,
6391 TYPE_MODE (mask_vectype), false))
6392 return false;
6394 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6395 && (memory_access_type != VMAT_GATHER_SCATTER
6396 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
6398 if (dump_enabled_p ())
6399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6400 "unsupported access type for masked store.\n");
6401 return false;
6404 else
6406 /* FORNOW. In some cases can vectorize even if data-type not supported
6407 (e.g. - array initialization with 0). */
6408 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6409 return false;
6412 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
6413 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6414 && memory_access_type != VMAT_GATHER_SCATTER
6415 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6416 if (grouped_store)
6418 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6419 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6420 group_size = DR_GROUP_SIZE (first_stmt_info);
6422 else
6424 first_stmt_info = stmt_info;
6425 first_dr_info = dr_info;
6426 group_size = vec_num = 1;
6429 if (!vec_stmt) /* transformation not required. */
6431 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6433 if (loop_vinfo
6434 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6435 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6436 memory_access_type, &gs_info);
6438 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6439 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6440 vls_type, slp_node, cost_vec);
6441 return true;
6443 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6445 /* Transform. */
6447 ensure_base_align (dr_info);
6449 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6451 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6452 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6453 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6454 tree ptr, var, scale, vec_mask;
6455 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
6456 tree mask_halfvectype = mask_vectype;
6457 edge pe = loop_preheader_edge (loop);
6458 gimple_seq seq;
6459 basic_block new_bb;
6460 enum { NARROW, NONE, WIDEN } modifier;
6461 poly_uint64 scatter_off_nunits
6462 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6464 if (known_eq (nunits, scatter_off_nunits))
6465 modifier = NONE;
6466 else if (known_eq (nunits * 2, scatter_off_nunits))
6468 modifier = WIDEN;
6470 /* Currently gathers and scatters are only supported for
6471 fixed-length vectors. */
6472 unsigned int count = scatter_off_nunits.to_constant ();
6473 vec_perm_builder sel (count, count, 1);
6474 for (i = 0; i < (unsigned int) count; ++i)
6475 sel.quick_push (i | (count / 2));
6477 vec_perm_indices indices (sel, 1, count);
6478 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6479 indices);
6480 gcc_assert (perm_mask != NULL_TREE);
6482 else if (known_eq (nunits, scatter_off_nunits * 2))
6484 modifier = NARROW;
6486 /* Currently gathers and scatters are only supported for
6487 fixed-length vectors. */
6488 unsigned int count = nunits.to_constant ();
6489 vec_perm_builder sel (count, count, 1);
6490 for (i = 0; i < (unsigned int) count; ++i)
6491 sel.quick_push (i | (count / 2));
6493 vec_perm_indices indices (sel, 2, count);
6494 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6495 gcc_assert (perm_mask != NULL_TREE);
6496 ncopies *= 2;
6498 if (mask)
6499 mask_halfvectype
6500 = build_same_sized_truth_vector_type (gs_info.offset_vectype);
6502 else
6503 gcc_unreachable ();
6505 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6506 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6507 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6508 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6509 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6510 scaletype = TREE_VALUE (arglist);
6512 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6513 && TREE_CODE (rettype) == VOID_TYPE);
6515 ptr = fold_convert (ptrtype, gs_info.base);
6516 if (!is_gimple_min_invariant (ptr))
6518 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6519 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6520 gcc_assert (!new_bb);
6523 if (mask == NULL_TREE)
6525 mask_arg = build_int_cst (masktype, -1);
6526 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
6529 scale = build_int_cst (scaletype, gs_info.scale);
6531 prev_stmt_info = NULL;
6532 for (j = 0; j < ncopies; ++j)
6534 if (j == 0)
6536 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
6537 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
6538 stmt_info);
6539 if (mask)
6540 mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
6541 stmt_info);
6543 else if (modifier != NONE && (j & 1))
6545 if (modifier == WIDEN)
6548 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6549 vec_oprnd1);
6550 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6551 stmt_info, gsi);
6552 if (mask)
6553 mask_op
6554 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6555 vec_mask);
6557 else if (modifier == NARROW)
6559 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6560 stmt_info, gsi);
6561 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6562 vec_oprnd0);
6564 else
6565 gcc_unreachable ();
6567 else
6569 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6570 vec_oprnd1);
6571 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6572 vec_oprnd0);
6573 if (mask)
6574 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6575 vec_mask);
6578 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6580 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6581 TYPE_VECTOR_SUBPARTS (srctype)));
6582 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6583 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6584 gassign *new_stmt
6585 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6586 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6587 src = var;
6590 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6592 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6593 TYPE_VECTOR_SUBPARTS (idxtype)));
6594 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6595 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6596 gassign *new_stmt
6597 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6598 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6599 op = var;
6602 if (mask)
6604 tree utype;
6605 mask_arg = mask_op;
6606 if (modifier == NARROW)
6608 var = vect_get_new_ssa_name (mask_halfvectype,
6609 vect_simple_var);
6610 gassign *new_stmt
6611 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
6612 : VEC_UNPACK_LO_EXPR,
6613 mask_op);
6614 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6615 mask_arg = var;
6617 tree optype = TREE_TYPE (mask_arg);
6618 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
6619 utype = masktype;
6620 else
6621 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
6622 var = vect_get_new_ssa_name (utype, vect_scalar_var);
6623 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
6624 gassign *new_stmt
6625 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
6626 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6627 mask_arg = var;
6628 if (!useless_type_conversion_p (masktype, utype))
6630 gcc_assert (TYPE_PRECISION (utype)
6631 <= TYPE_PRECISION (masktype));
6632 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
6633 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
6634 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6635 mask_arg = var;
6639 gcall *new_stmt
6640 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
6641 stmt_vec_info new_stmt_info
6642 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6644 if (prev_stmt_info == NULL)
6645 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6646 else
6647 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6648 prev_stmt_info = new_stmt_info;
6650 return true;
6653 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6654 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6656 if (grouped_store)
6658 /* FORNOW */
6659 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6661 /* We vectorize all the stmts of the interleaving group when we
6662 reach the last stmt in the group. */
6663 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6664 < DR_GROUP_SIZE (first_stmt_info)
6665 && !slp)
6667 *vec_stmt = NULL;
6668 return true;
6671 if (slp)
6673 grouped_store = false;
6674 /* VEC_NUM is the number of vect stmts to be created for this
6675 group. */
6676 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6677 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6678 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6679 == first_stmt_info);
6680 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6681 op = vect_get_store_rhs (first_stmt_info);
6683 else
6684 /* VEC_NUM is the number of vect stmts to be created for this
6685 group. */
6686 vec_num = group_size;
6688 ref_type = get_group_alias_ptr_type (first_stmt_info);
6690 else
6691 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
6693 if (dump_enabled_p ())
6694 dump_printf_loc (MSG_NOTE, vect_location,
6695 "transform store. ncopies = %d\n", ncopies);
6697 if (memory_access_type == VMAT_ELEMENTWISE
6698 || memory_access_type == VMAT_STRIDED_SLP)
6700 gimple_stmt_iterator incr_gsi;
6701 bool insert_after;
6702 gimple *incr;
6703 tree offvar;
6704 tree ivstep;
6705 tree running_off;
6706 tree stride_base, stride_step, alias_off;
6707 tree vec_oprnd;
6708 unsigned int g;
6709 /* Checked by get_load_store_type. */
6710 unsigned int const_nunits = nunits.to_constant ();
6712 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6713 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6715 stride_base
6716 = fold_build_pointer_plus
6717 (DR_BASE_ADDRESS (first_dr_info->dr),
6718 size_binop (PLUS_EXPR,
6719 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6720 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6721 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
6723 /* For a store with loop-invariant (but other than power-of-2)
6724 stride (i.e. not a grouped access) like so:
6726 for (i = 0; i < n; i += stride)
6727 array[i] = ...;
6729 we generate a new induction variable and new stores from
6730 the components of the (vectorized) rhs:
6732 for (j = 0; ; j += VF*stride)
6733 vectemp = ...;
6734 tmp1 = vectemp[0];
6735 array[j] = tmp1;
6736 tmp2 = vectemp[1];
6737 array[j + stride] = tmp2;
6741 unsigned nstores = const_nunits;
6742 unsigned lnel = 1;
6743 tree ltype = elem_type;
6744 tree lvectype = vectype;
6745 if (slp)
6747 if (group_size < const_nunits
6748 && const_nunits % group_size == 0)
6750 nstores = const_nunits / group_size;
6751 lnel = group_size;
6752 ltype = build_vector_type (elem_type, group_size);
6753 lvectype = vectype;
6755 /* First check if vec_extract optab doesn't support extraction
6756 of vector elts directly. */
6757 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6758 machine_mode vmode;
6759 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6760 || !VECTOR_MODE_P (vmode)
6761 || !targetm.vector_mode_supported_p (vmode)
6762 || (convert_optab_handler (vec_extract_optab,
6763 TYPE_MODE (vectype), vmode)
6764 == CODE_FOR_nothing))
6766 /* Try to avoid emitting an extract of vector elements
6767 by performing the extracts using an integer type of the
6768 same size, extracting from a vector of those and then
6769 re-interpreting it as the original vector type if
6770 supported. */
6771 unsigned lsize
6772 = group_size * GET_MODE_BITSIZE (elmode);
6773 unsigned int lnunits = const_nunits / group_size;
6774 /* If we can't construct such a vector fall back to
6775 element extracts from the original vector type and
6776 element size stores. */
6777 if (int_mode_for_size (lsize, 0).exists (&elmode)
6778 && mode_for_vector (elmode, lnunits).exists (&vmode)
6779 && VECTOR_MODE_P (vmode)
6780 && targetm.vector_mode_supported_p (vmode)
6781 && (convert_optab_handler (vec_extract_optab,
6782 vmode, elmode)
6783 != CODE_FOR_nothing))
6785 nstores = lnunits;
6786 lnel = group_size;
6787 ltype = build_nonstandard_integer_type (lsize, 1);
6788 lvectype = build_vector_type (ltype, nstores);
6790 /* Else fall back to vector extraction anyway.
6791 Fewer stores are more important than avoiding spilling
6792 of the vector we extract from. Compared to the
6793 construction case in vectorizable_load no store-forwarding
6794 issue exists here for reasonable archs. */
6797 else if (group_size >= const_nunits
6798 && group_size % const_nunits == 0)
6800 nstores = 1;
6801 lnel = const_nunits;
6802 ltype = vectype;
6803 lvectype = vectype;
6805 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6806 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6809 ivstep = stride_step;
6810 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6811 build_int_cst (TREE_TYPE (ivstep), vf));
6813 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6815 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6816 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6817 create_iv (stride_base, ivstep, NULL,
6818 loop, &incr_gsi, insert_after,
6819 &offvar, NULL);
6820 incr = gsi_stmt (incr_gsi);
6821 loop_vinfo->add_stmt (incr);
6823 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6825 prev_stmt_info = NULL;
6826 alias_off = build_int_cst (ref_type, 0);
6827 stmt_vec_info next_stmt_info = first_stmt_info;
6828 for (g = 0; g < group_size; g++)
6830 running_off = offvar;
6831 if (g)
6833 tree size = TYPE_SIZE_UNIT (ltype);
6834 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6835 size);
6836 tree newoff = copy_ssa_name (running_off, NULL);
6837 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6838 running_off, pos);
6839 vect_finish_stmt_generation (stmt_info, incr, gsi);
6840 running_off = newoff;
6842 unsigned int group_el = 0;
6843 unsigned HOST_WIDE_INT
6844 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6845 for (j = 0; j < ncopies; j++)
6847 /* We've set op and dt above, from vect_get_store_rhs,
6848 and first_stmt_info == stmt_info. */
6849 if (j == 0)
6851 if (slp)
6853 vect_get_vec_defs (op, NULL_TREE, stmt_info,
6854 &vec_oprnds, NULL, slp_node);
6855 vec_oprnd = vec_oprnds[0];
6857 else
6859 op = vect_get_store_rhs (next_stmt_info);
6860 vec_oprnd = vect_get_vec_def_for_operand
6861 (op, next_stmt_info);
6864 else
6866 if (slp)
6867 vec_oprnd = vec_oprnds[j];
6868 else
6869 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6870 vec_oprnd);
6872 /* Pun the vector to extract from if necessary. */
6873 if (lvectype != vectype)
6875 tree tem = make_ssa_name (lvectype);
6876 gimple *pun
6877 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6878 lvectype, vec_oprnd));
6879 vect_finish_stmt_generation (stmt_info, pun, gsi);
6880 vec_oprnd = tem;
6882 for (i = 0; i < nstores; i++)
6884 tree newref, newoff;
6885 gimple *incr, *assign;
6886 tree size = TYPE_SIZE (ltype);
6887 /* Extract the i'th component. */
6888 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6889 bitsize_int (i), size);
6890 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6891 size, pos);
6893 elem = force_gimple_operand_gsi (gsi, elem, true,
6894 NULL_TREE, true,
6895 GSI_SAME_STMT);
6897 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6898 group_el * elsz);
6899 newref = build2 (MEM_REF, ltype,
6900 running_off, this_off);
6901 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
6903 /* And store it to *running_off. */
6904 assign = gimple_build_assign (newref, elem);
6905 stmt_vec_info assign_info
6906 = vect_finish_stmt_generation (stmt_info, assign, gsi);
6908 group_el += lnel;
6909 if (! slp
6910 || group_el == group_size)
6912 newoff = copy_ssa_name (running_off, NULL);
6913 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6914 running_off, stride_step);
6915 vect_finish_stmt_generation (stmt_info, incr, gsi);
6917 running_off = newoff;
6918 group_el = 0;
6920 if (g == group_size - 1
6921 && !slp)
6923 if (j == 0 && i == 0)
6924 STMT_VINFO_VEC_STMT (stmt_info)
6925 = *vec_stmt = assign_info;
6926 else
6927 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6928 prev_stmt_info = assign_info;
6932 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6933 if (slp)
6934 break;
6937 vec_oprnds.release ();
6938 return true;
6941 auto_vec<tree> dr_chain (group_size);
6942 oprnds.create (group_size);
6944 alignment_support_scheme
6945 = vect_supportable_dr_alignment (first_dr_info, false);
6946 gcc_assert (alignment_support_scheme);
6947 vec_loop_masks *loop_masks
6948 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6949 ? &LOOP_VINFO_MASKS (loop_vinfo)
6950 : NULL);
6951 /* Targets with store-lane instructions must not require explicit
6952 realignment. vect_supportable_dr_alignment always returns either
6953 dr_aligned or dr_unaligned_supported for masked operations. */
6954 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6955 && !mask
6956 && !loop_masks)
6957 || alignment_support_scheme == dr_aligned
6958 || alignment_support_scheme == dr_unaligned_supported);
6960 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6961 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6962 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6964 tree bump;
6965 tree vec_offset = NULL_TREE;
6966 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6968 aggr_type = NULL_TREE;
6969 bump = NULL_TREE;
6971 else if (memory_access_type == VMAT_GATHER_SCATTER)
6973 aggr_type = elem_type;
6974 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
6975 &bump, &vec_offset);
6977 else
6979 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6980 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6981 else
6982 aggr_type = vectype;
6983 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
6984 memory_access_type);
6987 if (mask)
6988 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6990 /* In case the vectorization factor (VF) is bigger than the number
6991 of elements that we can fit in a vectype (nunits), we have to generate
6992 more than one vector stmt - i.e - we need to "unroll" the
6993 vector stmt by a factor VF/nunits. For more details see documentation in
6994 vect_get_vec_def_for_copy_stmt. */
6996 /* In case of interleaving (non-unit grouped access):
6998 S1: &base + 2 = x2
6999 S2: &base = x0
7000 S3: &base + 1 = x1
7001 S4: &base + 3 = x3
7003 We create vectorized stores starting from base address (the access of the
7004 first stmt in the chain (S2 in the above example), when the last store stmt
7005 of the chain (S4) is reached:
7007 VS1: &base = vx2
7008 VS2: &base + vec_size*1 = vx0
7009 VS3: &base + vec_size*2 = vx1
7010 VS4: &base + vec_size*3 = vx3
7012 Then permutation statements are generated:
7014 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7015 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7018 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7019 (the order of the data-refs in the output of vect_permute_store_chain
7020 corresponds to the order of scalar stmts in the interleaving chain - see
7021 the documentation of vect_permute_store_chain()).
7023 In case of both multiple types and interleaving, above vector stores and
7024 permutation stmts are created for every copy. The result vector stmts are
7025 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7026 STMT_VINFO_RELATED_STMT for the next copies.
7029 prev_stmt_info = NULL;
7030 tree vec_mask = NULL_TREE;
7031 for (j = 0; j < ncopies; j++)
7033 stmt_vec_info new_stmt_info;
7034 if (j == 0)
7036 if (slp)
7038 /* Get vectorized arguments for SLP_NODE. */
7039 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
7040 NULL, slp_node);
7042 vec_oprnd = vec_oprnds[0];
7044 else
7046 /* For interleaved stores we collect vectorized defs for all the
7047 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7048 used as an input to vect_permute_store_chain(), and OPRNDS as
7049 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
7051 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7052 OPRNDS are of size 1. */
7053 stmt_vec_info next_stmt_info = first_stmt_info;
7054 for (i = 0; i < group_size; i++)
7056 /* Since gaps are not supported for interleaved stores,
7057 DR_GROUP_SIZE is the exact number of stmts in the chain.
7058 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7059 that there is no interleaving, DR_GROUP_SIZE is 1,
7060 and only one iteration of the loop will be executed. */
7061 op = vect_get_store_rhs (next_stmt_info);
7062 vec_oprnd = vect_get_vec_def_for_operand
7063 (op, next_stmt_info);
7064 dr_chain.quick_push (vec_oprnd);
7065 oprnds.quick_push (vec_oprnd);
7066 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7068 if (mask)
7069 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
7070 mask_vectype);
7073 /* We should have catched mismatched types earlier. */
7074 gcc_assert (useless_type_conversion_p (vectype,
7075 TREE_TYPE (vec_oprnd)));
7076 bool simd_lane_access_p
7077 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7078 if (simd_lane_access_p
7079 && !loop_masks
7080 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7081 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7082 && integer_zerop (DR_OFFSET (first_dr_info->dr))
7083 && integer_zerop (DR_INIT (first_dr_info->dr))
7084 && alias_sets_conflict_p (get_alias_set (aggr_type),
7085 get_alias_set (TREE_TYPE (ref_type))))
7087 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7088 dataref_offset = build_int_cst (ref_type, 0);
7090 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7091 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
7092 &dataref_ptr, &vec_offset);
7093 else
7094 dataref_ptr
7095 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
7096 simd_lane_access_p ? loop : NULL,
7097 offset, &dummy, gsi, &ptr_incr,
7098 simd_lane_access_p, NULL_TREE, bump);
7100 else
7102 /* For interleaved stores we created vectorized defs for all the
7103 defs stored in OPRNDS in the previous iteration (previous copy).
7104 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7105 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7106 next copy.
7107 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7108 OPRNDS are of size 1. */
7109 for (i = 0; i < group_size; i++)
7111 op = oprnds[i];
7112 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
7113 dr_chain[i] = vec_oprnd;
7114 oprnds[i] = vec_oprnd;
7116 if (mask)
7117 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
7118 if (dataref_offset)
7119 dataref_offset
7120 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7121 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7122 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7123 else
7124 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7125 stmt_info, bump);
7128 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7130 tree vec_array;
7132 /* Get an array into which we can store the individual vectors. */
7133 vec_array = create_vector_array (vectype, vec_num);
7135 /* Invalidate the current contents of VEC_ARRAY. This should
7136 become an RTL clobber too, which prevents the vector registers
7137 from being upward-exposed. */
7138 vect_clobber_variable (stmt_info, gsi, vec_array);
7140 /* Store the individual vectors into the array. */
7141 for (i = 0; i < vec_num; i++)
7143 vec_oprnd = dr_chain[i];
7144 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7147 tree final_mask = NULL;
7148 if (loop_masks)
7149 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7150 vectype, j);
7151 if (vec_mask)
7152 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7153 vec_mask, gsi);
7155 gcall *call;
7156 if (final_mask)
7158 /* Emit:
7159 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7160 VEC_ARRAY). */
7161 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7162 tree alias_ptr = build_int_cst (ref_type, align);
7163 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7164 dataref_ptr, alias_ptr,
7165 final_mask, vec_array);
7167 else
7169 /* Emit:
7170 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7171 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7172 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7173 vec_array);
7174 gimple_call_set_lhs (call, data_ref);
7176 gimple_call_set_nothrow (call, true);
7177 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7179 /* Record that VEC_ARRAY is now dead. */
7180 vect_clobber_variable (stmt_info, gsi, vec_array);
7182 else
7184 new_stmt_info = NULL;
7185 if (grouped_store)
7187 if (j == 0)
7188 result_chain.create (group_size);
7189 /* Permute. */
7190 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7191 &result_chain);
7194 stmt_vec_info next_stmt_info = first_stmt_info;
7195 for (i = 0; i < vec_num; i++)
7197 unsigned misalign;
7198 unsigned HOST_WIDE_INT align;
7200 tree final_mask = NULL_TREE;
7201 if (loop_masks)
7202 final_mask = vect_get_loop_mask (gsi, loop_masks,
7203 vec_num * ncopies,
7204 vectype, vec_num * j + i);
7205 if (vec_mask)
7206 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7207 vec_mask, gsi);
7209 if (memory_access_type == VMAT_GATHER_SCATTER)
7211 tree scale = size_int (gs_info.scale);
7212 gcall *call;
7213 if (loop_masks)
7214 call = gimple_build_call_internal
7215 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7216 scale, vec_oprnd, final_mask);
7217 else
7218 call = gimple_build_call_internal
7219 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7220 scale, vec_oprnd);
7221 gimple_call_set_nothrow (call, true);
7222 new_stmt_info
7223 = vect_finish_stmt_generation (stmt_info, call, gsi);
7224 break;
7227 if (i > 0)
7228 /* Bump the vector pointer. */
7229 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7230 stmt_info, bump);
7232 if (slp)
7233 vec_oprnd = vec_oprnds[i];
7234 else if (grouped_store)
7235 /* For grouped stores vectorized defs are interleaved in
7236 vect_permute_store_chain(). */
7237 vec_oprnd = result_chain[i];
7239 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
7240 if (aligned_access_p (first_dr_info))
7241 misalign = 0;
7242 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7244 align = dr_alignment (vect_dr_behavior (first_dr_info));
7245 misalign = 0;
7247 else
7248 misalign = DR_MISALIGNMENT (first_dr_info);
7249 if (dataref_offset == NULL_TREE
7250 && TREE_CODE (dataref_ptr) == SSA_NAME)
7251 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7252 misalign);
7254 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7256 tree perm_mask = perm_mask_for_reverse (vectype);
7257 tree perm_dest = vect_create_destination_var
7258 (vect_get_store_rhs (stmt_info), vectype);
7259 tree new_temp = make_ssa_name (perm_dest);
7261 /* Generate the permute statement. */
7262 gimple *perm_stmt
7263 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7264 vec_oprnd, perm_mask);
7265 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7267 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7268 vec_oprnd = new_temp;
7271 /* Arguments are ready. Create the new vector stmt. */
7272 if (final_mask)
7274 align = least_bit_hwi (misalign | align);
7275 tree ptr = build_int_cst (ref_type, align);
7276 gcall *call
7277 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7278 dataref_ptr, ptr,
7279 final_mask, vec_oprnd);
7280 gimple_call_set_nothrow (call, true);
7281 new_stmt_info
7282 = vect_finish_stmt_generation (stmt_info, call, gsi);
7284 else
7286 data_ref = fold_build2 (MEM_REF, vectype,
7287 dataref_ptr,
7288 dataref_offset
7289 ? dataref_offset
7290 : build_int_cst (ref_type, 0));
7291 if (aligned_access_p (first_dr_info))
7293 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7294 TREE_TYPE (data_ref)
7295 = build_aligned_type (TREE_TYPE (data_ref),
7296 align * BITS_PER_UNIT);
7297 else
7298 TREE_TYPE (data_ref)
7299 = build_aligned_type (TREE_TYPE (data_ref),
7300 TYPE_ALIGN (elem_type));
7301 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7302 gassign *new_stmt
7303 = gimple_build_assign (data_ref, vec_oprnd);
7304 new_stmt_info
7305 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7308 if (slp)
7309 continue;
7311 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7312 if (!next_stmt_info)
7313 break;
7316 if (!slp)
7318 if (j == 0)
7319 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7320 else
7321 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7322 prev_stmt_info = new_stmt_info;
7326 oprnds.release ();
7327 result_chain.release ();
7328 vec_oprnds.release ();
7330 return true;
7333 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7334 VECTOR_CST mask. No checks are made that the target platform supports the
7335 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7336 vect_gen_perm_mask_checked. */
7338 tree
7339 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7341 tree mask_type;
7343 poly_uint64 nunits = sel.length ();
7344 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7346 mask_type = build_vector_type (ssizetype, nunits);
7347 return vec_perm_indices_to_tree (mask_type, sel);
7350 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7351 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7353 tree
7354 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7356 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7357 return vect_gen_perm_mask_any (vectype, sel);
7360 /* Given a vector variable X and Y, that was generated for the scalar
7361 STMT_INFO, generate instructions to permute the vector elements of X and Y
7362 using permutation mask MASK_VEC, insert them at *GSI and return the
7363 permuted vector variable. */
7365 static tree
7366 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7367 gimple_stmt_iterator *gsi)
7369 tree vectype = TREE_TYPE (x);
7370 tree perm_dest, data_ref;
7371 gimple *perm_stmt;
7373 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7374 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
7375 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7376 else
7377 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7378 data_ref = make_ssa_name (perm_dest);
7380 /* Generate the permute statement. */
7381 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7382 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7384 return data_ref;
7387 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7388 inserting them on the loops preheader edge. Returns true if we
7389 were successful in doing so (and thus STMT_INFO can be moved then),
7390 otherwise returns false. */
7392 static bool
7393 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7395 ssa_op_iter i;
7396 tree op;
7397 bool any = false;
7399 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7401 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7402 if (!gimple_nop_p (def_stmt)
7403 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7405 /* Make sure we don't need to recurse. While we could do
7406 so in simple cases when there are more complex use webs
7407 we don't have an easy way to preserve stmt order to fulfil
7408 dependencies within them. */
7409 tree op2;
7410 ssa_op_iter i2;
7411 if (gimple_code (def_stmt) == GIMPLE_PHI)
7412 return false;
7413 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7415 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7416 if (!gimple_nop_p (def_stmt2)
7417 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7418 return false;
7420 any = true;
7424 if (!any)
7425 return true;
7427 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7429 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7430 if (!gimple_nop_p (def_stmt)
7431 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7433 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7434 gsi_remove (&gsi, false);
7435 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7439 return true;
7442 /* vectorizable_load.
7444 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7445 that can be vectorized.
7446 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7447 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7448 Return true if STMT_INFO is vectorizable in this way. */
7450 static bool
7451 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7452 stmt_vec_info *vec_stmt, slp_tree slp_node,
7453 slp_instance slp_node_instance,
7454 stmt_vector_for_cost *cost_vec)
7456 tree scalar_dest;
7457 tree vec_dest = NULL;
7458 tree data_ref = NULL;
7459 stmt_vec_info prev_stmt_info;
7460 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7461 struct loop *loop = NULL;
7462 struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7463 bool nested_in_vect_loop = false;
7464 tree elem_type;
7465 tree new_temp;
7466 machine_mode mode;
7467 tree dummy;
7468 enum dr_alignment_support alignment_support_scheme;
7469 tree dataref_ptr = NULL_TREE;
7470 tree dataref_offset = NULL_TREE;
7471 gimple *ptr_incr = NULL;
7472 int ncopies;
7473 int i, j;
7474 unsigned int group_size;
7475 poly_uint64 group_gap_adj;
7476 tree msq = NULL_TREE, lsq;
7477 tree offset = NULL_TREE;
7478 tree byte_offset = NULL_TREE;
7479 tree realignment_token = NULL_TREE;
7480 gphi *phi = NULL;
7481 vec<tree> dr_chain = vNULL;
7482 bool grouped_load = false;
7483 stmt_vec_info first_stmt_info;
7484 stmt_vec_info first_stmt_info_for_drptr = NULL;
7485 bool compute_in_loop = false;
7486 struct loop *at_loop;
7487 int vec_num;
7488 bool slp = (slp_node != NULL);
7489 bool slp_perm = false;
7490 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7491 poly_uint64 vf;
7492 tree aggr_type;
7493 gather_scatter_info gs_info;
7494 vec_info *vinfo = stmt_info->vinfo;
7495 tree ref_type;
7496 enum vect_def_type mask_dt = vect_unknown_def_type;
7498 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7499 return false;
7501 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7502 && ! vec_stmt)
7503 return false;
7505 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7506 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7508 scalar_dest = gimple_assign_lhs (assign);
7509 if (TREE_CODE (scalar_dest) != SSA_NAME)
7510 return false;
7512 tree_code code = gimple_assign_rhs_code (assign);
7513 if (code != ARRAY_REF
7514 && code != BIT_FIELD_REF
7515 && code != INDIRECT_REF
7516 && code != COMPONENT_REF
7517 && code != IMAGPART_EXPR
7518 && code != REALPART_EXPR
7519 && code != MEM_REF
7520 && TREE_CODE_CLASS (code) != tcc_declaration)
7521 return false;
7523 else
7525 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7526 if (!call || !gimple_call_internal_p (call))
7527 return false;
7529 internal_fn ifn = gimple_call_internal_fn (call);
7530 if (!internal_load_fn_p (ifn))
7531 return false;
7533 scalar_dest = gimple_call_lhs (call);
7534 if (!scalar_dest)
7535 return false;
7537 if (slp_node != NULL)
7539 if (dump_enabled_p ())
7540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7541 "SLP of masked loads not supported.\n");
7542 return false;
7545 int mask_index = internal_fn_mask_index (ifn);
7546 if (mask_index >= 0)
7548 mask = gimple_call_arg (call, mask_index);
7549 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7550 &mask_vectype))
7551 return false;
7555 if (!STMT_VINFO_DATA_REF (stmt_info))
7556 return false;
7558 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7559 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7561 if (loop_vinfo)
7563 loop = LOOP_VINFO_LOOP (loop_vinfo);
7564 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7565 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7567 else
7568 vf = 1;
7570 /* Multiple types in SLP are handled by creating the appropriate number of
7571 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7572 case of SLP. */
7573 if (slp)
7574 ncopies = 1;
7575 else
7576 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7578 gcc_assert (ncopies >= 1);
7580 /* FORNOW. This restriction should be relaxed. */
7581 if (nested_in_vect_loop && ncopies > 1)
7583 if (dump_enabled_p ())
7584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7585 "multiple types in nested loop.\n");
7586 return false;
7589 /* Invalidate assumptions made by dependence analysis when vectorization
7590 on the unrolled body effectively re-orders stmts. */
7591 if (ncopies > 1
7592 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7593 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7594 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7596 if (dump_enabled_p ())
7597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7598 "cannot perform implicit CSE when unrolling "
7599 "with negative dependence distance\n");
7600 return false;
7603 elem_type = TREE_TYPE (vectype);
7604 mode = TYPE_MODE (vectype);
7606 /* FORNOW. In some cases can vectorize even if data-type not supported
7607 (e.g. - data copies). */
7608 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7610 if (dump_enabled_p ())
7611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7612 "Aligned load, but unsupported type.\n");
7613 return false;
7616 /* Check if the load is a part of an interleaving chain. */
7617 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7619 grouped_load = true;
7620 /* FORNOW */
7621 gcc_assert (!nested_in_vect_loop);
7622 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7624 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7625 group_size = DR_GROUP_SIZE (first_stmt_info);
7627 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7628 slp_perm = true;
7630 /* Invalidate assumptions made by dependence analysis when vectorization
7631 on the unrolled body effectively re-orders stmts. */
7632 if (!PURE_SLP_STMT (stmt_info)
7633 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7634 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7635 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7637 if (dump_enabled_p ())
7638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7639 "cannot perform implicit CSE when performing "
7640 "group loads with negative dependence distance\n");
7641 return false;
7644 /* Similarly when the stmt is a load that is both part of a SLP
7645 instance and a loop vectorized stmt via the same-dr mechanism
7646 we have to give up. */
7647 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7648 && (STMT_SLP_TYPE (stmt_info)
7649 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
7651 if (dump_enabled_p ())
7652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7653 "conflicting SLP types for CSEd load\n");
7654 return false;
7657 else
7658 group_size = 1;
7660 vect_memory_access_type memory_access_type;
7661 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7662 &memory_access_type, &gs_info))
7663 return false;
7665 if (mask)
7667 if (memory_access_type == VMAT_CONTIGUOUS)
7669 machine_mode vec_mode = TYPE_MODE (vectype);
7670 if (!VECTOR_MODE_P (vec_mode)
7671 || !can_vec_mask_load_store_p (vec_mode,
7672 TYPE_MODE (mask_vectype), true))
7673 return false;
7675 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7676 && memory_access_type != VMAT_GATHER_SCATTER)
7678 if (dump_enabled_p ())
7679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7680 "unsupported access type for masked load.\n");
7681 return false;
7685 if (!vec_stmt) /* transformation not required. */
7687 if (!slp)
7688 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7690 if (loop_vinfo
7691 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7692 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7693 memory_access_type, &gs_info);
7695 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7696 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7697 slp_node_instance, slp_node, cost_vec);
7698 return true;
7701 if (!slp)
7702 gcc_assert (memory_access_type
7703 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7705 if (dump_enabled_p ())
7706 dump_printf_loc (MSG_NOTE, vect_location,
7707 "transform load. ncopies = %d\n", ncopies);
7709 /* Transform. */
7711 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7712 ensure_base_align (dr_info);
7714 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7716 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7717 return true;
7720 if (memory_access_type == VMAT_INVARIANT)
7722 gcc_assert (!grouped_load && !mask && !bb_vinfo);
7723 /* If we have versioned for aliasing or the loop doesn't
7724 have any data dependencies that would preclude this,
7725 then we are sure this is a loop invariant load and
7726 thus we can insert it on the preheader edge. */
7727 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7728 && !nested_in_vect_loop
7729 && hoist_defs_of_uses (stmt_info, loop));
7730 if (hoist_p)
7732 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7733 if (dump_enabled_p ())
7734 dump_printf_loc (MSG_NOTE, vect_location,
7735 "hoisting out of the vectorized loop: %G", stmt);
7736 scalar_dest = copy_ssa_name (scalar_dest);
7737 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7738 gsi_insert_on_edge_immediate
7739 (loop_preheader_edge (loop),
7740 gimple_build_assign (scalar_dest, rhs));
7742 /* These copies are all equivalent, but currently the representation
7743 requires a separate STMT_VINFO_VEC_STMT for each one. */
7744 prev_stmt_info = NULL;
7745 gimple_stmt_iterator gsi2 = *gsi;
7746 gsi_next (&gsi2);
7747 for (j = 0; j < ncopies; j++)
7749 stmt_vec_info new_stmt_info;
7750 if (hoist_p)
7752 new_temp = vect_init_vector (stmt_info, scalar_dest,
7753 vectype, NULL);
7754 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7755 new_stmt_info = vinfo->add_stmt (new_stmt);
7757 else
7759 new_temp = vect_init_vector (stmt_info, scalar_dest,
7760 vectype, &gsi2);
7761 new_stmt_info = vinfo->lookup_def (new_temp);
7763 if (slp)
7764 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7765 else if (j == 0)
7766 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7767 else
7768 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7769 prev_stmt_info = new_stmt_info;
7771 return true;
7774 if (memory_access_type == VMAT_ELEMENTWISE
7775 || memory_access_type == VMAT_STRIDED_SLP)
7777 gimple_stmt_iterator incr_gsi;
7778 bool insert_after;
7779 gimple *incr;
7780 tree offvar;
7781 tree ivstep;
7782 tree running_off;
7783 vec<constructor_elt, va_gc> *v = NULL;
7784 tree stride_base, stride_step, alias_off;
7785 /* Checked by get_load_store_type. */
7786 unsigned int const_nunits = nunits.to_constant ();
7787 unsigned HOST_WIDE_INT cst_offset = 0;
7789 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7790 gcc_assert (!nested_in_vect_loop);
7792 if (grouped_load)
7794 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7795 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7797 else
7799 first_stmt_info = stmt_info;
7800 first_dr_info = dr_info;
7802 if (slp && grouped_load)
7804 group_size = DR_GROUP_SIZE (first_stmt_info);
7805 ref_type = get_group_alias_ptr_type (first_stmt_info);
7807 else
7809 if (grouped_load)
7810 cst_offset
7811 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7812 * vect_get_place_in_interleaving_chain (stmt_info,
7813 first_stmt_info));
7814 group_size = 1;
7815 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7818 stride_base
7819 = fold_build_pointer_plus
7820 (DR_BASE_ADDRESS (first_dr_info->dr),
7821 size_binop (PLUS_EXPR,
7822 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7823 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7824 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7826 /* For a load with loop-invariant (but other than power-of-2)
7827 stride (i.e. not a grouped access) like so:
7829 for (i = 0; i < n; i += stride)
7830 ... = array[i];
7832 we generate a new induction variable and new accesses to
7833 form a new vector (or vectors, depending on ncopies):
7835 for (j = 0; ; j += VF*stride)
7836 tmp1 = array[j];
7837 tmp2 = array[j + stride];
7839 vectemp = {tmp1, tmp2, ...}
7842 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7843 build_int_cst (TREE_TYPE (stride_step), vf));
7845 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7847 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7848 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7849 create_iv (stride_base, ivstep, NULL,
7850 loop, &incr_gsi, insert_after,
7851 &offvar, NULL);
7852 incr = gsi_stmt (incr_gsi);
7853 loop_vinfo->add_stmt (incr);
7855 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7857 prev_stmt_info = NULL;
7858 running_off = offvar;
7859 alias_off = build_int_cst (ref_type, 0);
7860 int nloads = const_nunits;
7861 int lnel = 1;
7862 tree ltype = TREE_TYPE (vectype);
7863 tree lvectype = vectype;
7864 auto_vec<tree> dr_chain;
7865 if (memory_access_type == VMAT_STRIDED_SLP)
7867 if (group_size < const_nunits)
7869 /* First check if vec_init optab supports construction from
7870 vector elts directly. */
7871 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7872 machine_mode vmode;
7873 if (mode_for_vector (elmode, group_size).exists (&vmode)
7874 && VECTOR_MODE_P (vmode)
7875 && targetm.vector_mode_supported_p (vmode)
7876 && (convert_optab_handler (vec_init_optab,
7877 TYPE_MODE (vectype), vmode)
7878 != CODE_FOR_nothing))
7880 nloads = const_nunits / group_size;
7881 lnel = group_size;
7882 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7884 else
7886 /* Otherwise avoid emitting a constructor of vector elements
7887 by performing the loads using an integer type of the same
7888 size, constructing a vector of those and then
7889 re-interpreting it as the original vector type.
7890 This avoids a huge runtime penalty due to the general
7891 inability to perform store forwarding from smaller stores
7892 to a larger load. */
7893 unsigned lsize
7894 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7895 unsigned int lnunits = const_nunits / group_size;
7896 /* If we can't construct such a vector fall back to
7897 element loads of the original vector type. */
7898 if (int_mode_for_size (lsize, 0).exists (&elmode)
7899 && mode_for_vector (elmode, lnunits).exists (&vmode)
7900 && VECTOR_MODE_P (vmode)
7901 && targetm.vector_mode_supported_p (vmode)
7902 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7903 != CODE_FOR_nothing))
7905 nloads = lnunits;
7906 lnel = group_size;
7907 ltype = build_nonstandard_integer_type (lsize, 1);
7908 lvectype = build_vector_type (ltype, nloads);
7912 else
7914 nloads = 1;
7915 lnel = const_nunits;
7916 ltype = vectype;
7918 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7920 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7921 else if (nloads == 1)
7922 ltype = vectype;
7924 if (slp)
7926 /* For SLP permutation support we need to load the whole group,
7927 not only the number of vector stmts the permutation result
7928 fits in. */
7929 if (slp_perm)
7931 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7932 variable VF. */
7933 unsigned int const_vf = vf.to_constant ();
7934 ncopies = CEIL (group_size * const_vf, const_nunits);
7935 dr_chain.create (ncopies);
7937 else
7938 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7940 unsigned int group_el = 0;
7941 unsigned HOST_WIDE_INT
7942 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7943 for (j = 0; j < ncopies; j++)
7945 if (nloads > 1)
7946 vec_alloc (v, nloads);
7947 stmt_vec_info new_stmt_info = NULL;
7948 for (i = 0; i < nloads; i++)
7950 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7951 group_el * elsz + cst_offset);
7952 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7953 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7954 gassign *new_stmt
7955 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7956 new_stmt_info
7957 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7958 if (nloads > 1)
7959 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7960 gimple_assign_lhs (new_stmt));
7962 group_el += lnel;
7963 if (! slp
7964 || group_el == group_size)
7966 tree newoff = copy_ssa_name (running_off);
7967 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7968 running_off, stride_step);
7969 vect_finish_stmt_generation (stmt_info, incr, gsi);
7971 running_off = newoff;
7972 group_el = 0;
7975 if (nloads > 1)
7977 tree vec_inv = build_constructor (lvectype, v);
7978 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
7979 new_stmt_info = vinfo->lookup_def (new_temp);
7980 if (lvectype != vectype)
7982 gassign *new_stmt
7983 = gimple_build_assign (make_ssa_name (vectype),
7984 VIEW_CONVERT_EXPR,
7985 build1 (VIEW_CONVERT_EXPR,
7986 vectype, new_temp));
7987 new_stmt_info
7988 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7992 if (slp)
7994 if (slp_perm)
7995 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
7996 else
7997 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7999 else
8001 if (j == 0)
8002 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8003 else
8004 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8005 prev_stmt_info = new_stmt_info;
8008 if (slp_perm)
8010 unsigned n_perms;
8011 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8012 slp_node_instance, false, &n_perms);
8014 return true;
8017 if (memory_access_type == VMAT_GATHER_SCATTER
8018 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8019 grouped_load = false;
8021 if (grouped_load)
8023 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8024 group_size = DR_GROUP_SIZE (first_stmt_info);
8025 /* For SLP vectorization we directly vectorize a subchain
8026 without permutation. */
8027 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8028 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8029 /* For BB vectorization always use the first stmt to base
8030 the data ref pointer on. */
8031 if (bb_vinfo)
8032 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8034 /* Check if the chain of loads is already vectorized. */
8035 if (STMT_VINFO_VEC_STMT (first_stmt_info)
8036 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8037 ??? But we can only do so if there is exactly one
8038 as we have no way to get at the rest. Leave the CSE
8039 opportunity alone.
8040 ??? With the group load eventually participating
8041 in multiple different permutations (having multiple
8042 slp nodes which refer to the same group) the CSE
8043 is even wrong code. See PR56270. */
8044 && !slp)
8046 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8047 return true;
8049 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8050 group_gap_adj = 0;
8052 /* VEC_NUM is the number of vect stmts to be created for this group. */
8053 if (slp)
8055 grouped_load = false;
8056 /* If an SLP permutation is from N elements to N elements,
8057 and if one vector holds a whole number of N, we can load
8058 the inputs to the permutation in the same way as an
8059 unpermuted sequence. In other cases we need to load the
8060 whole group, not only the number of vector stmts the
8061 permutation result fits in. */
8062 if (slp_perm
8063 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
8064 || !multiple_p (nunits, group_size)))
8066 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8067 variable VF; see vect_transform_slp_perm_load. */
8068 unsigned int const_vf = vf.to_constant ();
8069 unsigned int const_nunits = nunits.to_constant ();
8070 vec_num = CEIL (group_size * const_vf, const_nunits);
8071 group_gap_adj = vf * group_size - nunits * vec_num;
8073 else
8075 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8076 group_gap_adj
8077 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
8080 else
8081 vec_num = group_size;
8083 ref_type = get_group_alias_ptr_type (first_stmt_info);
8085 else
8087 first_stmt_info = stmt_info;
8088 first_dr_info = dr_info;
8089 group_size = vec_num = 1;
8090 group_gap_adj = 0;
8091 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8094 alignment_support_scheme
8095 = vect_supportable_dr_alignment (first_dr_info, false);
8096 gcc_assert (alignment_support_scheme);
8097 vec_loop_masks *loop_masks
8098 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8099 ? &LOOP_VINFO_MASKS (loop_vinfo)
8100 : NULL);
8101 /* Targets with store-lane instructions must not require explicit
8102 realignment. vect_supportable_dr_alignment always returns either
8103 dr_aligned or dr_unaligned_supported for masked operations. */
8104 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8105 && !mask
8106 && !loop_masks)
8107 || alignment_support_scheme == dr_aligned
8108 || alignment_support_scheme == dr_unaligned_supported);
8110 /* In case the vectorization factor (VF) is bigger than the number
8111 of elements that we can fit in a vectype (nunits), we have to generate
8112 more than one vector stmt - i.e - we need to "unroll" the
8113 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8114 from one copy of the vector stmt to the next, in the field
8115 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8116 stages to find the correct vector defs to be used when vectorizing
8117 stmts that use the defs of the current stmt. The example below
8118 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8119 need to create 4 vectorized stmts):
8121 before vectorization:
8122 RELATED_STMT VEC_STMT
8123 S1: x = memref - -
8124 S2: z = x + 1 - -
8126 step 1: vectorize stmt S1:
8127 We first create the vector stmt VS1_0, and, as usual, record a
8128 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8129 Next, we create the vector stmt VS1_1, and record a pointer to
8130 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8131 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8132 stmts and pointers:
8133 RELATED_STMT VEC_STMT
8134 VS1_0: vx0 = memref0 VS1_1 -
8135 VS1_1: vx1 = memref1 VS1_2 -
8136 VS1_2: vx2 = memref2 VS1_3 -
8137 VS1_3: vx3 = memref3 - -
8138 S1: x = load - VS1_0
8139 S2: z = x + 1 - -
8141 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8142 information we recorded in RELATED_STMT field is used to vectorize
8143 stmt S2. */
8145 /* In case of interleaving (non-unit grouped access):
8147 S1: x2 = &base + 2
8148 S2: x0 = &base
8149 S3: x1 = &base + 1
8150 S4: x3 = &base + 3
8152 Vectorized loads are created in the order of memory accesses
8153 starting from the access of the first stmt of the chain:
8155 VS1: vx0 = &base
8156 VS2: vx1 = &base + vec_size*1
8157 VS3: vx3 = &base + vec_size*2
8158 VS4: vx4 = &base + vec_size*3
8160 Then permutation statements are generated:
8162 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8163 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8166 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8167 (the order of the data-refs in the output of vect_permute_load_chain
8168 corresponds to the order of scalar stmts in the interleaving chain - see
8169 the documentation of vect_permute_load_chain()).
8170 The generation of permutation stmts and recording them in
8171 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8173 In case of both multiple types and interleaving, the vector loads and
8174 permutation stmts above are created for every copy. The result vector
8175 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8176 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8178 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8179 on a target that supports unaligned accesses (dr_unaligned_supported)
8180 we generate the following code:
8181 p = initial_addr;
8182 indx = 0;
8183 loop {
8184 p = p + indx * vectype_size;
8185 vec_dest = *(p);
8186 indx = indx + 1;
8189 Otherwise, the data reference is potentially unaligned on a target that
8190 does not support unaligned accesses (dr_explicit_realign_optimized) -
8191 then generate the following code, in which the data in each iteration is
8192 obtained by two vector loads, one from the previous iteration, and one
8193 from the current iteration:
8194 p1 = initial_addr;
8195 msq_init = *(floor(p1))
8196 p2 = initial_addr + VS - 1;
8197 realignment_token = call target_builtin;
8198 indx = 0;
8199 loop {
8200 p2 = p2 + indx * vectype_size
8201 lsq = *(floor(p2))
8202 vec_dest = realign_load (msq, lsq, realignment_token)
8203 indx = indx + 1;
8204 msq = lsq;
8205 } */
8207 /* If the misalignment remains the same throughout the execution of the
8208 loop, we can create the init_addr and permutation mask at the loop
8209 preheader. Otherwise, it needs to be created inside the loop.
8210 This can only occur when vectorizing memory accesses in the inner-loop
8211 nested within an outer-loop that is being vectorized. */
8213 if (nested_in_vect_loop
8214 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8215 GET_MODE_SIZE (TYPE_MODE (vectype))))
8217 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8218 compute_in_loop = true;
8221 if ((alignment_support_scheme == dr_explicit_realign_optimized
8222 || alignment_support_scheme == dr_explicit_realign)
8223 && !compute_in_loop)
8225 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8226 alignment_support_scheme, NULL_TREE,
8227 &at_loop);
8228 if (alignment_support_scheme == dr_explicit_realign_optimized)
8230 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8231 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8232 size_one_node);
8235 else
8236 at_loop = loop;
8238 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8239 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8241 tree bump;
8242 tree vec_offset = NULL_TREE;
8243 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8245 aggr_type = NULL_TREE;
8246 bump = NULL_TREE;
8248 else if (memory_access_type == VMAT_GATHER_SCATTER)
8250 aggr_type = elem_type;
8251 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8252 &bump, &vec_offset);
8254 else
8256 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8257 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8258 else
8259 aggr_type = vectype;
8260 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8261 memory_access_type);
8264 tree vec_mask = NULL_TREE;
8265 prev_stmt_info = NULL;
8266 poly_uint64 group_elt = 0;
8267 for (j = 0; j < ncopies; j++)
8269 stmt_vec_info new_stmt_info = NULL;
8270 /* 1. Create the vector or array pointer update chain. */
8271 if (j == 0)
8273 bool simd_lane_access_p
8274 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8275 if (simd_lane_access_p
8276 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8277 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8278 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8279 && integer_zerop (DR_INIT (first_dr_info->dr))
8280 && alias_sets_conflict_p (get_alias_set (aggr_type),
8281 get_alias_set (TREE_TYPE (ref_type)))
8282 && (alignment_support_scheme == dr_aligned
8283 || alignment_support_scheme == dr_unaligned_supported))
8285 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8286 dataref_offset = build_int_cst (ref_type, 0);
8288 else if (first_stmt_info_for_drptr
8289 && first_stmt_info != first_stmt_info_for_drptr)
8291 dataref_ptr
8292 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8293 aggr_type, at_loop, offset, &dummy,
8294 gsi, &ptr_incr, simd_lane_access_p,
8295 byte_offset, bump);
8296 /* Adjust the pointer by the difference to first_stmt. */
8297 data_reference_p ptrdr
8298 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8299 tree diff
8300 = fold_convert (sizetype,
8301 size_binop (MINUS_EXPR,
8302 DR_INIT (first_dr_info->dr),
8303 DR_INIT (ptrdr)));
8304 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8305 stmt_info, diff);
8307 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8308 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8309 &dataref_ptr, &vec_offset);
8310 else
8311 dataref_ptr
8312 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8313 offset, &dummy, gsi, &ptr_incr,
8314 simd_lane_access_p,
8315 byte_offset, bump);
8316 if (mask)
8317 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8318 mask_vectype);
8320 else
8322 if (dataref_offset)
8323 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8324 bump);
8325 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8326 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8327 else
8328 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8329 stmt_info, bump);
8330 if (mask)
8331 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8334 if (grouped_load || slp_perm)
8335 dr_chain.create (vec_num);
8337 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8339 tree vec_array;
8341 vec_array = create_vector_array (vectype, vec_num);
8343 tree final_mask = NULL_TREE;
8344 if (loop_masks)
8345 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8346 vectype, j);
8347 if (vec_mask)
8348 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8349 vec_mask, gsi);
8351 gcall *call;
8352 if (final_mask)
8354 /* Emit:
8355 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8356 VEC_MASK). */
8357 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8358 tree alias_ptr = build_int_cst (ref_type, align);
8359 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8360 dataref_ptr, alias_ptr,
8361 final_mask);
8363 else
8365 /* Emit:
8366 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8367 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8368 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8370 gimple_call_set_lhs (call, vec_array);
8371 gimple_call_set_nothrow (call, true);
8372 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8374 /* Extract each vector into an SSA_NAME. */
8375 for (i = 0; i < vec_num; i++)
8377 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8378 vec_array, i);
8379 dr_chain.quick_push (new_temp);
8382 /* Record the mapping between SSA_NAMEs and statements. */
8383 vect_record_grouped_load_vectors (stmt_info, dr_chain);
8385 /* Record that VEC_ARRAY is now dead. */
8386 vect_clobber_variable (stmt_info, gsi, vec_array);
8388 else
8390 for (i = 0; i < vec_num; i++)
8392 tree final_mask = NULL_TREE;
8393 if (loop_masks
8394 && memory_access_type != VMAT_INVARIANT)
8395 final_mask = vect_get_loop_mask (gsi, loop_masks,
8396 vec_num * ncopies,
8397 vectype, vec_num * j + i);
8398 if (vec_mask)
8399 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8400 vec_mask, gsi);
8402 if (i > 0)
8403 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8404 stmt_info, bump);
8406 /* 2. Create the vector-load in the loop. */
8407 gimple *new_stmt = NULL;
8408 switch (alignment_support_scheme)
8410 case dr_aligned:
8411 case dr_unaligned_supported:
8413 unsigned int misalign;
8414 unsigned HOST_WIDE_INT align;
8416 if (memory_access_type == VMAT_GATHER_SCATTER)
8418 tree scale = size_int (gs_info.scale);
8419 gcall *call;
8420 if (loop_masks)
8421 call = gimple_build_call_internal
8422 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8423 vec_offset, scale, final_mask);
8424 else
8425 call = gimple_build_call_internal
8426 (IFN_GATHER_LOAD, 3, dataref_ptr,
8427 vec_offset, scale);
8428 gimple_call_set_nothrow (call, true);
8429 new_stmt = call;
8430 data_ref = NULL_TREE;
8431 break;
8434 align =
8435 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8436 if (alignment_support_scheme == dr_aligned)
8438 gcc_assert (aligned_access_p (first_dr_info));
8439 misalign = 0;
8441 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8443 align = dr_alignment
8444 (vect_dr_behavior (first_dr_info));
8445 misalign = 0;
8447 else
8448 misalign = DR_MISALIGNMENT (first_dr_info);
8449 if (dataref_offset == NULL_TREE
8450 && TREE_CODE (dataref_ptr) == SSA_NAME)
8451 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8452 align, misalign);
8454 if (final_mask)
8456 align = least_bit_hwi (misalign | align);
8457 tree ptr = build_int_cst (ref_type, align);
8458 gcall *call
8459 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8460 dataref_ptr, ptr,
8461 final_mask);
8462 gimple_call_set_nothrow (call, true);
8463 new_stmt = call;
8464 data_ref = NULL_TREE;
8466 else
8468 data_ref
8469 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8470 dataref_offset
8471 ? dataref_offset
8472 : build_int_cst (ref_type, 0));
8473 if (alignment_support_scheme == dr_aligned)
8475 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8476 TREE_TYPE (data_ref)
8477 = build_aligned_type (TREE_TYPE (data_ref),
8478 align * BITS_PER_UNIT);
8479 else
8480 TREE_TYPE (data_ref)
8481 = build_aligned_type (TREE_TYPE (data_ref),
8482 TYPE_ALIGN (elem_type));
8484 break;
8486 case dr_explicit_realign:
8488 tree ptr, bump;
8490 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8492 if (compute_in_loop)
8493 msq = vect_setup_realignment (first_stmt_info, gsi,
8494 &realignment_token,
8495 dr_explicit_realign,
8496 dataref_ptr, NULL);
8498 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8499 ptr = copy_ssa_name (dataref_ptr);
8500 else
8501 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8502 // For explicit realign the target alignment should be
8503 // known at compile time.
8504 unsigned HOST_WIDE_INT align =
8505 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8506 new_stmt = gimple_build_assign
8507 (ptr, BIT_AND_EXPR, dataref_ptr,
8508 build_int_cst
8509 (TREE_TYPE (dataref_ptr),
8510 -(HOST_WIDE_INT) align));
8511 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8512 data_ref
8513 = build2 (MEM_REF, vectype, ptr,
8514 build_int_cst (ref_type, 0));
8515 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8516 vec_dest = vect_create_destination_var (scalar_dest,
8517 vectype);
8518 new_stmt = gimple_build_assign (vec_dest, data_ref);
8519 new_temp = make_ssa_name (vec_dest, new_stmt);
8520 gimple_assign_set_lhs (new_stmt, new_temp);
8521 gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8522 gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8523 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8524 msq = new_temp;
8526 bump = size_binop (MULT_EXPR, vs,
8527 TYPE_SIZE_UNIT (elem_type));
8528 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8529 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8530 stmt_info, bump);
8531 new_stmt = gimple_build_assign
8532 (NULL_TREE, BIT_AND_EXPR, ptr,
8533 build_int_cst
8534 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8535 ptr = copy_ssa_name (ptr, new_stmt);
8536 gimple_assign_set_lhs (new_stmt, ptr);
8537 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8538 data_ref
8539 = build2 (MEM_REF, vectype, ptr,
8540 build_int_cst (ref_type, 0));
8541 break;
8543 case dr_explicit_realign_optimized:
8545 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8546 new_temp = copy_ssa_name (dataref_ptr);
8547 else
8548 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8549 // We should only be doing this if we know the target
8550 // alignment at compile time.
8551 unsigned HOST_WIDE_INT align =
8552 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8553 new_stmt = gimple_build_assign
8554 (new_temp, BIT_AND_EXPR, dataref_ptr,
8555 build_int_cst (TREE_TYPE (dataref_ptr),
8556 -(HOST_WIDE_INT) align));
8557 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8558 data_ref
8559 = build2 (MEM_REF, vectype, new_temp,
8560 build_int_cst (ref_type, 0));
8561 break;
8563 default:
8564 gcc_unreachable ();
8566 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8567 /* DATA_REF is null if we've already built the statement. */
8568 if (data_ref)
8570 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8571 new_stmt = gimple_build_assign (vec_dest, data_ref);
8573 new_temp = make_ssa_name (vec_dest, new_stmt);
8574 gimple_set_lhs (new_stmt, new_temp);
8575 new_stmt_info
8576 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8578 /* 3. Handle explicit realignment if necessary/supported.
8579 Create in loop:
8580 vec_dest = realign_load (msq, lsq, realignment_token) */
8581 if (alignment_support_scheme == dr_explicit_realign_optimized
8582 || alignment_support_scheme == dr_explicit_realign)
8584 lsq = gimple_assign_lhs (new_stmt);
8585 if (!realignment_token)
8586 realignment_token = dataref_ptr;
8587 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8588 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8589 msq, lsq, realignment_token);
8590 new_temp = make_ssa_name (vec_dest, new_stmt);
8591 gimple_assign_set_lhs (new_stmt, new_temp);
8592 new_stmt_info
8593 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8595 if (alignment_support_scheme == dr_explicit_realign_optimized)
8597 gcc_assert (phi);
8598 if (i == vec_num - 1 && j == ncopies - 1)
8599 add_phi_arg (phi, lsq,
8600 loop_latch_edge (containing_loop),
8601 UNKNOWN_LOCATION);
8602 msq = lsq;
8606 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8608 tree perm_mask = perm_mask_for_reverse (vectype);
8609 new_temp = permute_vec_elements (new_temp, new_temp,
8610 perm_mask, stmt_info, gsi);
8611 new_stmt_info = vinfo->lookup_def (new_temp);
8614 /* Collect vector loads and later create their permutation in
8615 vect_transform_grouped_load (). */
8616 if (grouped_load || slp_perm)
8617 dr_chain.quick_push (new_temp);
8619 /* Store vector loads in the corresponding SLP_NODE. */
8620 if (slp && !slp_perm)
8621 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8623 /* With SLP permutation we load the gaps as well, without
8624 we need to skip the gaps after we manage to fully load
8625 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8626 group_elt += nunits;
8627 if (maybe_ne (group_gap_adj, 0U)
8628 && !slp_perm
8629 && known_eq (group_elt, group_size - group_gap_adj))
8631 poly_wide_int bump_val
8632 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8633 * group_gap_adj);
8634 tree bump = wide_int_to_tree (sizetype, bump_val);
8635 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8636 stmt_info, bump);
8637 group_elt = 0;
8640 /* Bump the vector pointer to account for a gap or for excess
8641 elements loaded for a permuted SLP load. */
8642 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8644 poly_wide_int bump_val
8645 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8646 * group_gap_adj);
8647 tree bump = wide_int_to_tree (sizetype, bump_val);
8648 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8649 stmt_info, bump);
8653 if (slp && !slp_perm)
8654 continue;
8656 if (slp_perm)
8658 unsigned n_perms;
8659 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8660 slp_node_instance, false,
8661 &n_perms))
8663 dr_chain.release ();
8664 return false;
8667 else
8669 if (grouped_load)
8671 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8672 vect_transform_grouped_load (stmt_info, dr_chain,
8673 group_size, gsi);
8674 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8676 else
8678 if (j == 0)
8679 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8680 else
8681 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8682 prev_stmt_info = new_stmt_info;
8685 dr_chain.release ();
8688 return true;
8691 /* Function vect_is_simple_cond.
8693 Input:
8694 LOOP - the loop that is being vectorized.
8695 COND - Condition that is checked for simple use.
8697 Output:
8698 *COMP_VECTYPE - the vector type for the comparison.
8699 *DTS - The def types for the arguments of the comparison
8701 Returns whether a COND can be vectorized. Checks whether
8702 condition operands are supportable using vec_is_simple_use. */
8704 static bool
8705 vect_is_simple_cond (tree cond, vec_info *vinfo,
8706 tree *comp_vectype, enum vect_def_type *dts,
8707 tree vectype)
8709 tree lhs, rhs;
8710 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8712 /* Mask case. */
8713 if (TREE_CODE (cond) == SSA_NAME
8714 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8716 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8717 || !*comp_vectype
8718 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8719 return false;
8720 return true;
8723 if (!COMPARISON_CLASS_P (cond))
8724 return false;
8726 lhs = TREE_OPERAND (cond, 0);
8727 rhs = TREE_OPERAND (cond, 1);
8729 if (TREE_CODE (lhs) == SSA_NAME)
8731 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8732 return false;
8734 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8735 || TREE_CODE (lhs) == FIXED_CST)
8736 dts[0] = vect_constant_def;
8737 else
8738 return false;
8740 if (TREE_CODE (rhs) == SSA_NAME)
8742 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8743 return false;
8745 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8746 || TREE_CODE (rhs) == FIXED_CST)
8747 dts[1] = vect_constant_def;
8748 else
8749 return false;
8751 if (vectype1 && vectype2
8752 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8753 TYPE_VECTOR_SUBPARTS (vectype2)))
8754 return false;
8756 *comp_vectype = vectype1 ? vectype1 : vectype2;
8757 /* Invariant comparison. */
8758 if (! *comp_vectype && vectype)
8760 tree scalar_type = TREE_TYPE (lhs);
8761 /* If we can widen the comparison to match vectype do so. */
8762 if (INTEGRAL_TYPE_P (scalar_type)
8763 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8764 TYPE_SIZE (TREE_TYPE (vectype))))
8765 scalar_type = build_nonstandard_integer_type
8766 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8767 TYPE_UNSIGNED (scalar_type));
8768 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8771 return true;
8774 /* vectorizable_condition.
8776 Check if STMT_INFO is conditional modify expression that can be vectorized.
8777 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8778 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8779 at GSI.
8781 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
8783 Return true if STMT_INFO is vectorizable in this way. */
8785 bool
8786 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8787 stmt_vec_info *vec_stmt, bool for_reduction,
8788 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8790 vec_info *vinfo = stmt_info->vinfo;
8791 tree scalar_dest = NULL_TREE;
8792 tree vec_dest = NULL_TREE;
8793 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8794 tree then_clause, else_clause;
8795 tree comp_vectype = NULL_TREE;
8796 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8797 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8798 tree vec_compare;
8799 tree new_temp;
8800 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8801 enum vect_def_type dts[4]
8802 = {vect_unknown_def_type, vect_unknown_def_type,
8803 vect_unknown_def_type, vect_unknown_def_type};
8804 int ndts = 4;
8805 int ncopies;
8806 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8807 stmt_vec_info prev_stmt_info = NULL;
8808 int i, j;
8809 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8810 vec<tree> vec_oprnds0 = vNULL;
8811 vec<tree> vec_oprnds1 = vNULL;
8812 vec<tree> vec_oprnds2 = vNULL;
8813 vec<tree> vec_oprnds3 = vNULL;
8814 tree vec_cmp_type;
8815 bool masked = false;
8817 if (for_reduction && STMT_SLP_TYPE (stmt_info))
8818 return false;
8820 vect_reduction_type reduction_type
8821 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8822 if (reduction_type == TREE_CODE_REDUCTION)
8824 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8825 return false;
8827 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8828 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8829 && for_reduction))
8830 return false;
8832 /* FORNOW: not yet supported. */
8833 if (STMT_VINFO_LIVE_P (stmt_info))
8835 if (dump_enabled_p ())
8836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8837 "value used after loop.\n");
8838 return false;
8842 /* Is vectorizable conditional operation? */
8843 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8844 if (!stmt)
8845 return false;
8847 code = gimple_assign_rhs_code (stmt);
8849 if (code != COND_EXPR)
8850 return false;
8852 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8853 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8855 if (slp_node)
8856 ncopies = 1;
8857 else
8858 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8860 gcc_assert (ncopies >= 1);
8861 if (for_reduction && ncopies > 1)
8862 return false; /* FORNOW */
8864 cond_expr = gimple_assign_rhs1 (stmt);
8865 then_clause = gimple_assign_rhs2 (stmt);
8866 else_clause = gimple_assign_rhs3 (stmt);
8868 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8869 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8870 || !comp_vectype)
8871 return false;
8873 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8874 return false;
8875 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8876 return false;
8878 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8879 return false;
8881 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8882 return false;
8884 masked = !COMPARISON_CLASS_P (cond_expr);
8885 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8887 if (vec_cmp_type == NULL_TREE)
8888 return false;
8890 cond_code = TREE_CODE (cond_expr);
8891 if (!masked)
8893 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8894 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8897 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8899 /* Boolean values may have another representation in vectors
8900 and therefore we prefer bit operations over comparison for
8901 them (which also works for scalar masks). We store opcodes
8902 to use in bitop1 and bitop2. Statement is vectorized as
8903 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8904 depending on bitop1 and bitop2 arity. */
8905 switch (cond_code)
8907 case GT_EXPR:
8908 bitop1 = BIT_NOT_EXPR;
8909 bitop2 = BIT_AND_EXPR;
8910 break;
8911 case GE_EXPR:
8912 bitop1 = BIT_NOT_EXPR;
8913 bitop2 = BIT_IOR_EXPR;
8914 break;
8915 case LT_EXPR:
8916 bitop1 = BIT_NOT_EXPR;
8917 bitop2 = BIT_AND_EXPR;
8918 std::swap (cond_expr0, cond_expr1);
8919 break;
8920 case LE_EXPR:
8921 bitop1 = BIT_NOT_EXPR;
8922 bitop2 = BIT_IOR_EXPR;
8923 std::swap (cond_expr0, cond_expr1);
8924 break;
8925 case NE_EXPR:
8926 bitop1 = BIT_XOR_EXPR;
8927 break;
8928 case EQ_EXPR:
8929 bitop1 = BIT_XOR_EXPR;
8930 bitop2 = BIT_NOT_EXPR;
8931 break;
8932 default:
8933 return false;
8935 cond_code = SSA_NAME;
8938 if (!vec_stmt)
8940 if (bitop1 != NOP_EXPR)
8942 machine_mode mode = TYPE_MODE (comp_vectype);
8943 optab optab;
8945 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8946 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8947 return false;
8949 if (bitop2 != NOP_EXPR)
8951 optab = optab_for_tree_code (bitop2, comp_vectype,
8952 optab_default);
8953 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8954 return false;
8957 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8958 cond_code))
8960 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8961 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8962 cost_vec);
8963 return true;
8965 return false;
8968 /* Transform. */
8970 if (!slp_node)
8972 vec_oprnds0.create (1);
8973 vec_oprnds1.create (1);
8974 vec_oprnds2.create (1);
8975 vec_oprnds3.create (1);
8978 /* Handle def. */
8979 scalar_dest = gimple_assign_lhs (stmt);
8980 if (reduction_type != EXTRACT_LAST_REDUCTION)
8981 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8983 /* Handle cond expr. */
8984 for (j = 0; j < ncopies; j++)
8986 stmt_vec_info new_stmt_info = NULL;
8987 if (j == 0)
8989 if (slp_node)
8991 auto_vec<tree, 4> ops;
8992 auto_vec<vec<tree>, 4> vec_defs;
8994 if (masked)
8995 ops.safe_push (cond_expr);
8996 else
8998 ops.safe_push (cond_expr0);
8999 ops.safe_push (cond_expr1);
9001 ops.safe_push (then_clause);
9002 ops.safe_push (else_clause);
9003 vect_get_slp_defs (ops, slp_node, &vec_defs);
9004 vec_oprnds3 = vec_defs.pop ();
9005 vec_oprnds2 = vec_defs.pop ();
9006 if (!masked)
9007 vec_oprnds1 = vec_defs.pop ();
9008 vec_oprnds0 = vec_defs.pop ();
9010 else
9012 if (masked)
9014 vec_cond_lhs
9015 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
9016 comp_vectype);
9018 else
9020 vec_cond_lhs
9021 = vect_get_vec_def_for_operand (cond_expr0,
9022 stmt_info, comp_vectype);
9023 vec_cond_rhs
9024 = vect_get_vec_def_for_operand (cond_expr1,
9025 stmt_info, comp_vectype);
9027 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
9028 stmt_info);
9029 if (reduction_type != EXTRACT_LAST_REDUCTION)
9030 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
9031 stmt_info);
9034 else
9036 vec_cond_lhs
9037 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
9038 if (!masked)
9039 vec_cond_rhs
9040 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
9042 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9043 vec_oprnds2.pop ());
9044 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9045 vec_oprnds3.pop ());
9048 if (!slp_node)
9050 vec_oprnds0.quick_push (vec_cond_lhs);
9051 if (!masked)
9052 vec_oprnds1.quick_push (vec_cond_rhs);
9053 vec_oprnds2.quick_push (vec_then_clause);
9054 vec_oprnds3.quick_push (vec_else_clause);
9057 /* Arguments are ready. Create the new vector stmt. */
9058 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
9060 vec_then_clause = vec_oprnds2[i];
9061 vec_else_clause = vec_oprnds3[i];
9063 if (masked)
9064 vec_compare = vec_cond_lhs;
9065 else
9067 vec_cond_rhs = vec_oprnds1[i];
9068 if (bitop1 == NOP_EXPR)
9069 vec_compare = build2 (cond_code, vec_cmp_type,
9070 vec_cond_lhs, vec_cond_rhs);
9071 else
9073 new_temp = make_ssa_name (vec_cmp_type);
9074 gassign *new_stmt;
9075 if (bitop1 == BIT_NOT_EXPR)
9076 new_stmt = gimple_build_assign (new_temp, bitop1,
9077 vec_cond_rhs);
9078 else
9079 new_stmt
9080 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9081 vec_cond_rhs);
9082 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9083 if (bitop2 == NOP_EXPR)
9084 vec_compare = new_temp;
9085 else if (bitop2 == BIT_NOT_EXPR)
9087 /* Instead of doing ~x ? y : z do x ? z : y. */
9088 vec_compare = new_temp;
9089 std::swap (vec_then_clause, vec_else_clause);
9091 else
9093 vec_compare = make_ssa_name (vec_cmp_type);
9094 new_stmt
9095 = gimple_build_assign (vec_compare, bitop2,
9096 vec_cond_lhs, new_temp);
9097 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9101 if (reduction_type == EXTRACT_LAST_REDUCTION)
9103 if (!is_gimple_val (vec_compare))
9105 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9106 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9107 vec_compare);
9108 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9109 vec_compare = vec_compare_name;
9111 gcall *new_stmt = gimple_build_call_internal
9112 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9113 vec_then_clause);
9114 gimple_call_set_lhs (new_stmt, scalar_dest);
9115 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9116 if (stmt_info->stmt == gsi_stmt (*gsi))
9117 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9118 else
9120 /* In this case we're moving the definition to later in the
9121 block. That doesn't matter because the only uses of the
9122 lhs are in phi statements. */
9123 gimple_stmt_iterator old_gsi
9124 = gsi_for_stmt (stmt_info->stmt);
9125 gsi_remove (&old_gsi, true);
9126 new_stmt_info
9127 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9130 else
9132 new_temp = make_ssa_name (vec_dest);
9133 gassign *new_stmt
9134 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9135 vec_then_clause, vec_else_clause);
9136 new_stmt_info
9137 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9139 if (slp_node)
9140 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9143 if (slp_node)
9144 continue;
9146 if (j == 0)
9147 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9148 else
9149 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9151 prev_stmt_info = new_stmt_info;
9154 vec_oprnds0.release ();
9155 vec_oprnds1.release ();
9156 vec_oprnds2.release ();
9157 vec_oprnds3.release ();
9159 return true;
9162 /* vectorizable_comparison.
9164 Check if STMT_INFO is comparison expression that can be vectorized.
9165 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9166 comparison, put it in VEC_STMT, and insert it at GSI.
9168 Return true if STMT_INFO is vectorizable in this way. */
9170 static bool
9171 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9172 stmt_vec_info *vec_stmt,
9173 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9175 vec_info *vinfo = stmt_info->vinfo;
9176 tree lhs, rhs1, rhs2;
9177 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9178 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9179 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9180 tree new_temp;
9181 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9182 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9183 int ndts = 2;
9184 poly_uint64 nunits;
9185 int ncopies;
9186 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9187 stmt_vec_info prev_stmt_info = NULL;
9188 int i, j;
9189 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9190 vec<tree> vec_oprnds0 = vNULL;
9191 vec<tree> vec_oprnds1 = vNULL;
9192 tree mask_type;
9193 tree mask;
9195 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9196 return false;
9198 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9199 return false;
9201 mask_type = vectype;
9202 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9204 if (slp_node)
9205 ncopies = 1;
9206 else
9207 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9209 gcc_assert (ncopies >= 1);
9210 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9211 return false;
9213 if (STMT_VINFO_LIVE_P (stmt_info))
9215 if (dump_enabled_p ())
9216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9217 "value used after loop.\n");
9218 return false;
9221 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9222 if (!stmt)
9223 return false;
9225 code = gimple_assign_rhs_code (stmt);
9227 if (TREE_CODE_CLASS (code) != tcc_comparison)
9228 return false;
9230 rhs1 = gimple_assign_rhs1 (stmt);
9231 rhs2 = gimple_assign_rhs2 (stmt);
9233 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9234 return false;
9236 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9237 return false;
9239 if (vectype1 && vectype2
9240 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9241 TYPE_VECTOR_SUBPARTS (vectype2)))
9242 return false;
9244 vectype = vectype1 ? vectype1 : vectype2;
9246 /* Invariant comparison. */
9247 if (!vectype)
9249 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9250 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9251 return false;
9253 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9254 return false;
9256 /* Can't compare mask and non-mask types. */
9257 if (vectype1 && vectype2
9258 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9259 return false;
9261 /* Boolean values may have another representation in vectors
9262 and therefore we prefer bit operations over comparison for
9263 them (which also works for scalar masks). We store opcodes
9264 to use in bitop1 and bitop2. Statement is vectorized as
9265 BITOP2 (rhs1 BITOP1 rhs2) or
9266 rhs1 BITOP2 (BITOP1 rhs2)
9267 depending on bitop1 and bitop2 arity. */
9268 bool swap_p = false;
9269 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9271 if (code == GT_EXPR)
9273 bitop1 = BIT_NOT_EXPR;
9274 bitop2 = BIT_AND_EXPR;
9276 else if (code == GE_EXPR)
9278 bitop1 = BIT_NOT_EXPR;
9279 bitop2 = BIT_IOR_EXPR;
9281 else if (code == LT_EXPR)
9283 bitop1 = BIT_NOT_EXPR;
9284 bitop2 = BIT_AND_EXPR;
9285 swap_p = true;
9287 else if (code == LE_EXPR)
9289 bitop1 = BIT_NOT_EXPR;
9290 bitop2 = BIT_IOR_EXPR;
9291 swap_p = true;
9293 else
9295 bitop1 = BIT_XOR_EXPR;
9296 if (code == EQ_EXPR)
9297 bitop2 = BIT_NOT_EXPR;
9301 if (!vec_stmt)
9303 if (bitop1 == NOP_EXPR)
9305 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9306 return false;
9308 else
9310 machine_mode mode = TYPE_MODE (vectype);
9311 optab optab;
9313 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9314 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9315 return false;
9317 if (bitop2 != NOP_EXPR)
9319 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9320 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9321 return false;
9325 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9326 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9327 dts, ndts, slp_node, cost_vec);
9328 return true;
9331 /* Transform. */
9332 if (!slp_node)
9334 vec_oprnds0.create (1);
9335 vec_oprnds1.create (1);
9338 /* Handle def. */
9339 lhs = gimple_assign_lhs (stmt);
9340 mask = vect_create_destination_var (lhs, mask_type);
9342 /* Handle cmp expr. */
9343 for (j = 0; j < ncopies; j++)
9345 stmt_vec_info new_stmt_info = NULL;
9346 if (j == 0)
9348 if (slp_node)
9350 auto_vec<tree, 2> ops;
9351 auto_vec<vec<tree>, 2> vec_defs;
9353 ops.safe_push (rhs1);
9354 ops.safe_push (rhs2);
9355 vect_get_slp_defs (ops, slp_node, &vec_defs);
9356 vec_oprnds1 = vec_defs.pop ();
9357 vec_oprnds0 = vec_defs.pop ();
9358 if (swap_p)
9359 std::swap (vec_oprnds0, vec_oprnds1);
9361 else
9363 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9364 vectype);
9365 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9366 vectype);
9369 else
9371 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9372 vec_oprnds0.pop ());
9373 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9374 vec_oprnds1.pop ());
9377 if (!slp_node)
9379 if (swap_p)
9380 std::swap (vec_rhs1, vec_rhs2);
9381 vec_oprnds0.quick_push (vec_rhs1);
9382 vec_oprnds1.quick_push (vec_rhs2);
9385 /* Arguments are ready. Create the new vector stmt. */
9386 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9388 vec_rhs2 = vec_oprnds1[i];
9390 new_temp = make_ssa_name (mask);
9391 if (bitop1 == NOP_EXPR)
9393 gassign *new_stmt = gimple_build_assign (new_temp, code,
9394 vec_rhs1, vec_rhs2);
9395 new_stmt_info
9396 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9398 else
9400 gassign *new_stmt;
9401 if (bitop1 == BIT_NOT_EXPR)
9402 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9403 else
9404 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9405 vec_rhs2);
9406 new_stmt_info
9407 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9408 if (bitop2 != NOP_EXPR)
9410 tree res = make_ssa_name (mask);
9411 if (bitop2 == BIT_NOT_EXPR)
9412 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9413 else
9414 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9415 new_temp);
9416 new_stmt_info
9417 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9420 if (slp_node)
9421 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9424 if (slp_node)
9425 continue;
9427 if (j == 0)
9428 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9429 else
9430 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9432 prev_stmt_info = new_stmt_info;
9435 vec_oprnds0.release ();
9436 vec_oprnds1.release ();
9438 return true;
9441 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9442 can handle all live statements in the node. Otherwise return true
9443 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9444 GSI and VEC_STMT are as for vectorizable_live_operation. */
9446 static bool
9447 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9448 slp_tree slp_node, stmt_vec_info *vec_stmt,
9449 stmt_vector_for_cost *cost_vec)
9451 if (slp_node)
9453 stmt_vec_info slp_stmt_info;
9454 unsigned int i;
9455 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9457 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9458 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9459 vec_stmt, cost_vec))
9460 return false;
9463 else if (STMT_VINFO_LIVE_P (stmt_info)
9464 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9465 vec_stmt, cost_vec))
9466 return false;
9468 return true;
9471 /* Make sure the statement is vectorizable. */
9473 opt_result
9474 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9475 slp_tree node, slp_instance node_instance,
9476 stmt_vector_for_cost *cost_vec)
9478 vec_info *vinfo = stmt_info->vinfo;
9479 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9480 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9481 bool ok;
9482 gimple_seq pattern_def_seq;
9484 if (dump_enabled_p ())
9485 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9486 stmt_info->stmt);
9488 if (gimple_has_volatile_ops (stmt_info->stmt))
9489 return opt_result::failure_at (stmt_info->stmt,
9490 "not vectorized:"
9491 " stmt has volatile operands: %G\n",
9492 stmt_info->stmt);
9494 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9495 && node == NULL
9496 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9498 gimple_stmt_iterator si;
9500 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9502 stmt_vec_info pattern_def_stmt_info
9503 = vinfo->lookup_stmt (gsi_stmt (si));
9504 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9505 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9507 /* Analyze def stmt of STMT if it's a pattern stmt. */
9508 if (dump_enabled_p ())
9509 dump_printf_loc (MSG_NOTE, vect_location,
9510 "==> examining pattern def statement: %G",
9511 pattern_def_stmt_info->stmt);
9513 opt_result res
9514 = vect_analyze_stmt (pattern_def_stmt_info,
9515 need_to_vectorize, node, node_instance,
9516 cost_vec);
9517 if (!res)
9518 return res;
9523 /* Skip stmts that do not need to be vectorized. In loops this is expected
9524 to include:
9525 - the COND_EXPR which is the loop exit condition
9526 - any LABEL_EXPRs in the loop
9527 - computations that are used only for array indexing or loop control.
9528 In basic blocks we only analyze statements that are a part of some SLP
9529 instance, therefore, all the statements are relevant.
9531 Pattern statement needs to be analyzed instead of the original statement
9532 if the original statement is not relevant. Otherwise, we analyze both
9533 statements. In basic blocks we are called from some SLP instance
9534 traversal, don't analyze pattern stmts instead, the pattern stmts
9535 already will be part of SLP instance. */
9537 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9538 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9539 && !STMT_VINFO_LIVE_P (stmt_info))
9541 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9542 && pattern_stmt_info
9543 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9544 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9546 /* Analyze PATTERN_STMT instead of the original stmt. */
9547 stmt_info = pattern_stmt_info;
9548 if (dump_enabled_p ())
9549 dump_printf_loc (MSG_NOTE, vect_location,
9550 "==> examining pattern statement: %G",
9551 stmt_info->stmt);
9553 else
9555 if (dump_enabled_p ())
9556 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9558 return opt_result::success ();
9561 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9562 && node == NULL
9563 && pattern_stmt_info
9564 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9565 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9567 /* Analyze PATTERN_STMT too. */
9568 if (dump_enabled_p ())
9569 dump_printf_loc (MSG_NOTE, vect_location,
9570 "==> examining pattern statement: %G",
9571 pattern_stmt_info->stmt);
9573 opt_result res
9574 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9575 node_instance, cost_vec);
9576 if (!res)
9577 return res;
9580 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9582 case vect_internal_def:
9583 break;
9585 case vect_reduction_def:
9586 case vect_nested_cycle:
9587 gcc_assert (!bb_vinfo
9588 && (relevance == vect_used_in_outer
9589 || relevance == vect_used_in_outer_by_reduction
9590 || relevance == vect_used_by_reduction
9591 || relevance == vect_unused_in_scope
9592 || relevance == vect_used_only_live));
9593 break;
9595 case vect_induction_def:
9596 gcc_assert (!bb_vinfo);
9597 break;
9599 case vect_constant_def:
9600 case vect_external_def:
9601 case vect_unknown_def_type:
9602 default:
9603 gcc_unreachable ();
9606 if (STMT_VINFO_RELEVANT_P (stmt_info))
9608 tree type = gimple_expr_type (stmt_info->stmt);
9609 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9610 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9611 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9612 || (call && gimple_call_lhs (call) == NULL_TREE));
9613 *need_to_vectorize = true;
9616 if (PURE_SLP_STMT (stmt_info) && !node)
9618 if (dump_enabled_p ())
9619 dump_printf_loc (MSG_NOTE, vect_location,
9620 "handled only by SLP analysis\n");
9621 return opt_result::success ();
9624 ok = true;
9625 if (!bb_vinfo
9626 && (STMT_VINFO_RELEVANT_P (stmt_info)
9627 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9628 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9629 -mveclibabi= takes preference over library functions with
9630 the simd attribute. */
9631 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9632 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9633 cost_vec)
9634 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9635 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9636 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9637 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9638 cost_vec)
9639 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9640 || vectorizable_reduction (stmt_info, NULL, NULL, node,
9641 node_instance, cost_vec)
9642 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9643 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9644 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9645 cost_vec)
9646 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9647 cost_vec));
9648 else
9650 if (bb_vinfo)
9651 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9652 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9653 cost_vec)
9654 || vectorizable_conversion (stmt_info, NULL, NULL, node,
9655 cost_vec)
9656 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9657 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9658 || vectorizable_assignment (stmt_info, NULL, NULL, node,
9659 cost_vec)
9660 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9661 cost_vec)
9662 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9663 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9664 cost_vec)
9665 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9666 cost_vec));
9669 if (!ok)
9670 return opt_result::failure_at (stmt_info->stmt,
9671 "not vectorized:"
9672 " relevant stmt not supported: %G",
9673 stmt_info->stmt);
9675 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9676 need extra handling, except for vectorizable reductions. */
9677 if (!bb_vinfo
9678 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9679 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9680 return opt_result::failure_at (stmt_info->stmt,
9681 "not vectorized:"
9682 " live stmt not supported: %G",
9683 stmt_info->stmt);
9685 return opt_result::success ();
9689 /* Function vect_transform_stmt.
9691 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9693 bool
9694 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9695 slp_tree slp_node, slp_instance slp_node_instance)
9697 vec_info *vinfo = stmt_info->vinfo;
9698 bool is_store = false;
9699 stmt_vec_info vec_stmt = NULL;
9700 bool done;
9702 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9703 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9705 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9706 && nested_in_vect_loop_p
9707 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9708 stmt_info));
9710 gimple *stmt = stmt_info->stmt;
9711 switch (STMT_VINFO_TYPE (stmt_info))
9713 case type_demotion_vec_info_type:
9714 case type_promotion_vec_info_type:
9715 case type_conversion_vec_info_type:
9716 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9717 NULL);
9718 gcc_assert (done);
9719 break;
9721 case induc_vec_info_type:
9722 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9723 NULL);
9724 gcc_assert (done);
9725 break;
9727 case shift_vec_info_type:
9728 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9729 gcc_assert (done);
9730 break;
9732 case op_vec_info_type:
9733 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9734 NULL);
9735 gcc_assert (done);
9736 break;
9738 case assignment_vec_info_type:
9739 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9740 NULL);
9741 gcc_assert (done);
9742 break;
9744 case load_vec_info_type:
9745 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9746 slp_node_instance, NULL);
9747 gcc_assert (done);
9748 break;
9750 case store_vec_info_type:
9751 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9752 gcc_assert (done);
9753 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9755 /* In case of interleaving, the whole chain is vectorized when the
9756 last store in the chain is reached. Store stmts before the last
9757 one are skipped, and there vec_stmt_info shouldn't be freed
9758 meanwhile. */
9759 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9760 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9761 is_store = true;
9763 else
9764 is_store = true;
9765 break;
9767 case condition_vec_info_type:
9768 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
9769 slp_node, NULL);
9770 gcc_assert (done);
9771 break;
9773 case comparison_vec_info_type:
9774 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
9775 slp_node, NULL);
9776 gcc_assert (done);
9777 break;
9779 case call_vec_info_type:
9780 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9781 stmt = gsi_stmt (*gsi);
9782 break;
9784 case call_simd_clone_vec_info_type:
9785 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9786 slp_node, NULL);
9787 stmt = gsi_stmt (*gsi);
9788 break;
9790 case reduc_vec_info_type:
9791 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9792 slp_node_instance, NULL);
9793 gcc_assert (done);
9794 break;
9796 default:
9797 if (!STMT_VINFO_LIVE_P (stmt_info))
9799 if (dump_enabled_p ())
9800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9801 "stmt not supported.\n");
9802 gcc_unreachable ();
9806 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9807 This would break hybrid SLP vectorization. */
9808 if (slp_node)
9809 gcc_assert (!vec_stmt
9810 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9812 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9813 is being vectorized, but outside the immediately enclosing loop. */
9814 if (vec_stmt
9815 && nested_p
9816 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9817 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9818 || STMT_VINFO_RELEVANT (stmt_info) ==
9819 vect_used_in_outer_by_reduction))
9821 struct loop *innerloop = LOOP_VINFO_LOOP (
9822 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9823 imm_use_iterator imm_iter;
9824 use_operand_p use_p;
9825 tree scalar_dest;
9827 if (dump_enabled_p ())
9828 dump_printf_loc (MSG_NOTE, vect_location,
9829 "Record the vdef for outer-loop vectorization.\n");
9831 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9832 (to be used when vectorizing outer-loop stmts that use the DEF of
9833 STMT). */
9834 if (gimple_code (stmt) == GIMPLE_PHI)
9835 scalar_dest = PHI_RESULT (stmt);
9836 else
9837 scalar_dest = gimple_get_lhs (stmt);
9839 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9840 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9842 stmt_vec_info exit_phi_info
9843 = vinfo->lookup_stmt (USE_STMT (use_p));
9844 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9848 /* Handle stmts whose DEF is used outside the loop-nest that is
9849 being vectorized. */
9850 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9852 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9853 NULL);
9854 gcc_assert (done);
9857 if (vec_stmt)
9858 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9860 return is_store;
9864 /* Remove a group of stores (for SLP or interleaving), free their
9865 stmt_vec_info. */
9867 void
9868 vect_remove_stores (stmt_vec_info first_stmt_info)
9870 vec_info *vinfo = first_stmt_info->vinfo;
9871 stmt_vec_info next_stmt_info = first_stmt_info;
9873 while (next_stmt_info)
9875 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9876 next_stmt_info = vect_orig_stmt (next_stmt_info);
9877 /* Free the attached stmt_vec_info and remove the stmt. */
9878 vinfo->remove_stmt (next_stmt_info);
9879 next_stmt_info = tmp;
9883 /* Function get_vectype_for_scalar_type_and_size.
9885 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9886 by the target. */
9888 tree
9889 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9891 tree orig_scalar_type = scalar_type;
9892 scalar_mode inner_mode;
9893 machine_mode simd_mode;
9894 poly_uint64 nunits;
9895 tree vectype;
9897 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9898 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9899 return NULL_TREE;
9901 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9903 /* For vector types of elements whose mode precision doesn't
9904 match their types precision we use a element type of mode
9905 precision. The vectorization routines will have to make sure
9906 they support the proper result truncation/extension.
9907 We also make sure to build vector types with INTEGER_TYPE
9908 component type only. */
9909 if (INTEGRAL_TYPE_P (scalar_type)
9910 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9911 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9912 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9913 TYPE_UNSIGNED (scalar_type));
9915 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9916 When the component mode passes the above test simply use a type
9917 corresponding to that mode. The theory is that any use that
9918 would cause problems with this will disable vectorization anyway. */
9919 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9920 && !INTEGRAL_TYPE_P (scalar_type))
9921 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9923 /* We can't build a vector type of elements with alignment bigger than
9924 their size. */
9925 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9926 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9927 TYPE_UNSIGNED (scalar_type));
9929 /* If we felt back to using the mode fail if there was
9930 no scalar type for it. */
9931 if (scalar_type == NULL_TREE)
9932 return NULL_TREE;
9934 /* If no size was supplied use the mode the target prefers. Otherwise
9935 lookup a vector mode of the specified size. */
9936 if (known_eq (size, 0U))
9937 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9938 else if (!multiple_p (size, nbytes, &nunits)
9939 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9940 return NULL_TREE;
9941 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9942 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9943 return NULL_TREE;
9945 vectype = build_vector_type (scalar_type, nunits);
9947 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9948 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9949 return NULL_TREE;
9951 /* Re-attach the address-space qualifier if we canonicalized the scalar
9952 type. */
9953 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9954 return build_qualified_type
9955 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9957 return vectype;
9960 poly_uint64 current_vector_size;
9962 /* Function get_vectype_for_scalar_type.
9964 Returns the vector type corresponding to SCALAR_TYPE as supported
9965 by the target. */
9967 tree
9968 get_vectype_for_scalar_type (tree scalar_type)
9970 tree vectype;
9971 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9972 current_vector_size);
9973 if (vectype
9974 && known_eq (current_vector_size, 0U))
9975 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9976 return vectype;
9979 /* Function get_mask_type_for_scalar_type.
9981 Returns the mask type corresponding to a result of comparison
9982 of vectors of specified SCALAR_TYPE as supported by target. */
9984 tree
9985 get_mask_type_for_scalar_type (tree scalar_type)
9987 tree vectype = get_vectype_for_scalar_type (scalar_type);
9989 if (!vectype)
9990 return NULL;
9992 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9993 current_vector_size);
9996 /* Function get_same_sized_vectype
9998 Returns a vector type corresponding to SCALAR_TYPE of size
9999 VECTOR_TYPE if supported by the target. */
10001 tree
10002 get_same_sized_vectype (tree scalar_type, tree vector_type)
10004 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10005 return build_same_sized_truth_vector_type (vector_type);
10007 return get_vectype_for_scalar_type_and_size
10008 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
10011 /* Function vect_is_simple_use.
10013 Input:
10014 VINFO - the vect info of the loop or basic block that is being vectorized.
10015 OPERAND - operand in the loop or bb.
10016 Output:
10017 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10018 case OPERAND is an SSA_NAME that is defined in the vectorizable region
10019 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10020 the definition could be anywhere in the function
10021 DT - the type of definition
10023 Returns whether a stmt with OPERAND can be vectorized.
10024 For loops, supportable operands are constants, loop invariants, and operands
10025 that are defined by the current iteration of the loop. Unsupportable
10026 operands are those that are defined by a previous iteration of the loop (as
10027 is the case in reduction/induction computations).
10028 For basic blocks, supportable operands are constants and bb invariants.
10029 For now, operands defined outside the basic block are not supported. */
10031 bool
10032 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10033 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
10035 if (def_stmt_info_out)
10036 *def_stmt_info_out = NULL;
10037 if (def_stmt_out)
10038 *def_stmt_out = NULL;
10039 *dt = vect_unknown_def_type;
10041 if (dump_enabled_p ())
10043 dump_printf_loc (MSG_NOTE, vect_location,
10044 "vect_is_simple_use: operand ");
10045 if (TREE_CODE (operand) == SSA_NAME
10046 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10047 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10048 else
10049 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10052 if (CONSTANT_CLASS_P (operand))
10053 *dt = vect_constant_def;
10054 else if (is_gimple_min_invariant (operand))
10055 *dt = vect_external_def;
10056 else if (TREE_CODE (operand) != SSA_NAME)
10057 *dt = vect_unknown_def_type;
10058 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
10059 *dt = vect_external_def;
10060 else
10062 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10063 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10064 if (!stmt_vinfo)
10065 *dt = vect_external_def;
10066 else
10068 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
10069 def_stmt = stmt_vinfo->stmt;
10070 switch (gimple_code (def_stmt))
10072 case GIMPLE_PHI:
10073 case GIMPLE_ASSIGN:
10074 case GIMPLE_CALL:
10075 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10076 break;
10077 default:
10078 *dt = vect_unknown_def_type;
10079 break;
10081 if (def_stmt_info_out)
10082 *def_stmt_info_out = stmt_vinfo;
10084 if (def_stmt_out)
10085 *def_stmt_out = def_stmt;
10088 if (dump_enabled_p ())
10090 dump_printf (MSG_NOTE, ", type of def: ");
10091 switch (*dt)
10093 case vect_uninitialized_def:
10094 dump_printf (MSG_NOTE, "uninitialized\n");
10095 break;
10096 case vect_constant_def:
10097 dump_printf (MSG_NOTE, "constant\n");
10098 break;
10099 case vect_external_def:
10100 dump_printf (MSG_NOTE, "external\n");
10101 break;
10102 case vect_internal_def:
10103 dump_printf (MSG_NOTE, "internal\n");
10104 break;
10105 case vect_induction_def:
10106 dump_printf (MSG_NOTE, "induction\n");
10107 break;
10108 case vect_reduction_def:
10109 dump_printf (MSG_NOTE, "reduction\n");
10110 break;
10111 case vect_double_reduction_def:
10112 dump_printf (MSG_NOTE, "double reduction\n");
10113 break;
10114 case vect_nested_cycle:
10115 dump_printf (MSG_NOTE, "nested cycle\n");
10116 break;
10117 case vect_unknown_def_type:
10118 dump_printf (MSG_NOTE, "unknown\n");
10119 break;
10123 if (*dt == vect_unknown_def_type)
10125 if (dump_enabled_p ())
10126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10127 "Unsupported pattern.\n");
10128 return false;
10131 return true;
10134 /* Function vect_is_simple_use.
10136 Same as vect_is_simple_use but also determines the vector operand
10137 type of OPERAND and stores it to *VECTYPE. If the definition of
10138 OPERAND is vect_uninitialized_def, vect_constant_def or
10139 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10140 is responsible to compute the best suited vector type for the
10141 scalar operand. */
10143 bool
10144 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10145 tree *vectype, stmt_vec_info *def_stmt_info_out,
10146 gimple **def_stmt_out)
10148 stmt_vec_info def_stmt_info;
10149 gimple *def_stmt;
10150 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10151 return false;
10153 if (def_stmt_out)
10154 *def_stmt_out = def_stmt;
10155 if (def_stmt_info_out)
10156 *def_stmt_info_out = def_stmt_info;
10158 /* Now get a vector type if the def is internal, otherwise supply
10159 NULL_TREE and leave it up to the caller to figure out a proper
10160 type for the use stmt. */
10161 if (*dt == vect_internal_def
10162 || *dt == vect_induction_def
10163 || *dt == vect_reduction_def
10164 || *dt == vect_double_reduction_def
10165 || *dt == vect_nested_cycle)
10167 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10168 gcc_assert (*vectype != NULL_TREE);
10169 if (dump_enabled_p ())
10170 dump_printf_loc (MSG_NOTE, vect_location,
10171 "vect_is_simple_use: vectype %T\n", *vectype);
10173 else if (*dt == vect_uninitialized_def
10174 || *dt == vect_constant_def
10175 || *dt == vect_external_def)
10176 *vectype = NULL_TREE;
10177 else
10178 gcc_unreachable ();
10180 return true;
10184 /* Function supportable_widening_operation
10186 Check whether an operation represented by the code CODE is a
10187 widening operation that is supported by the target platform in
10188 vector form (i.e., when operating on arguments of type VECTYPE_IN
10189 producing a result of type VECTYPE_OUT).
10191 Widening operations we currently support are NOP (CONVERT), FLOAT,
10192 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10193 are supported by the target platform either directly (via vector
10194 tree-codes), or via target builtins.
10196 Output:
10197 - CODE1 and CODE2 are codes of vector operations to be used when
10198 vectorizing the operation, if available.
10199 - MULTI_STEP_CVT determines the number of required intermediate steps in
10200 case of multi-step conversion (like char->short->int - in that case
10201 MULTI_STEP_CVT will be 1).
10202 - INTERM_TYPES contains the intermediate type required to perform the
10203 widening operation (short in the above example). */
10205 bool
10206 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10207 tree vectype_out, tree vectype_in,
10208 enum tree_code *code1, enum tree_code *code2,
10209 int *multi_step_cvt,
10210 vec<tree> *interm_types)
10212 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10213 struct loop *vect_loop = NULL;
10214 machine_mode vec_mode;
10215 enum insn_code icode1, icode2;
10216 optab optab1, optab2;
10217 tree vectype = vectype_in;
10218 tree wide_vectype = vectype_out;
10219 enum tree_code c1, c2;
10220 int i;
10221 tree prev_type, intermediate_type;
10222 machine_mode intermediate_mode, prev_mode;
10223 optab optab3, optab4;
10225 *multi_step_cvt = 0;
10226 if (loop_info)
10227 vect_loop = LOOP_VINFO_LOOP (loop_info);
10229 switch (code)
10231 case WIDEN_MULT_EXPR:
10232 /* The result of a vectorized widening operation usually requires
10233 two vectors (because the widened results do not fit into one vector).
10234 The generated vector results would normally be expected to be
10235 generated in the same order as in the original scalar computation,
10236 i.e. if 8 results are generated in each vector iteration, they are
10237 to be organized as follows:
10238 vect1: [res1,res2,res3,res4],
10239 vect2: [res5,res6,res7,res8].
10241 However, in the special case that the result of the widening
10242 operation is used in a reduction computation only, the order doesn't
10243 matter (because when vectorizing a reduction we change the order of
10244 the computation). Some targets can take advantage of this and
10245 generate more efficient code. For example, targets like Altivec,
10246 that support widen_mult using a sequence of {mult_even,mult_odd}
10247 generate the following vectors:
10248 vect1: [res1,res3,res5,res7],
10249 vect2: [res2,res4,res6,res8].
10251 When vectorizing outer-loops, we execute the inner-loop sequentially
10252 (each vectorized inner-loop iteration contributes to VF outer-loop
10253 iterations in parallel). We therefore don't allow to change the
10254 order of the computation in the inner-loop during outer-loop
10255 vectorization. */
10256 /* TODO: Another case in which order doesn't *really* matter is when we
10257 widen and then contract again, e.g. (short)((int)x * y >> 8).
10258 Normally, pack_trunc performs an even/odd permute, whereas the
10259 repack from an even/odd expansion would be an interleave, which
10260 would be significantly simpler for e.g. AVX2. */
10261 /* In any case, in order to avoid duplicating the code below, recurse
10262 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10263 are properly set up for the caller. If we fail, we'll continue with
10264 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10265 if (vect_loop
10266 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10267 && !nested_in_vect_loop_p (vect_loop, stmt_info)
10268 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10269 stmt_info, vectype_out,
10270 vectype_in, code1, code2,
10271 multi_step_cvt, interm_types))
10273 /* Elements in a vector with vect_used_by_reduction property cannot
10274 be reordered if the use chain with this property does not have the
10275 same operation. One such an example is s += a * b, where elements
10276 in a and b cannot be reordered. Here we check if the vector defined
10277 by STMT is only directly used in the reduction statement. */
10278 tree lhs = gimple_assign_lhs (stmt_info->stmt);
10279 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10280 if (use_stmt_info
10281 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10282 return true;
10284 c1 = VEC_WIDEN_MULT_LO_EXPR;
10285 c2 = VEC_WIDEN_MULT_HI_EXPR;
10286 break;
10288 case DOT_PROD_EXPR:
10289 c1 = DOT_PROD_EXPR;
10290 c2 = DOT_PROD_EXPR;
10291 break;
10293 case SAD_EXPR:
10294 c1 = SAD_EXPR;
10295 c2 = SAD_EXPR;
10296 break;
10298 case VEC_WIDEN_MULT_EVEN_EXPR:
10299 /* Support the recursion induced just above. */
10300 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10301 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10302 break;
10304 case WIDEN_LSHIFT_EXPR:
10305 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10306 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10307 break;
10309 CASE_CONVERT:
10310 c1 = VEC_UNPACK_LO_EXPR;
10311 c2 = VEC_UNPACK_HI_EXPR;
10312 break;
10314 case FLOAT_EXPR:
10315 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10316 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10317 break;
10319 case FIX_TRUNC_EXPR:
10320 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10321 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10322 break;
10324 default:
10325 gcc_unreachable ();
10328 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10329 std::swap (c1, c2);
10331 if (code == FIX_TRUNC_EXPR)
10333 /* The signedness is determined from output operand. */
10334 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10335 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10337 else if (CONVERT_EXPR_CODE_P (code)
10338 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
10339 && VECTOR_BOOLEAN_TYPE_P (vectype)
10340 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
10341 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10343 /* If the input and result modes are the same, a different optab
10344 is needed where we pass in the number of units in vectype. */
10345 optab1 = vec_unpacks_sbool_lo_optab;
10346 optab2 = vec_unpacks_sbool_hi_optab;
10348 else
10350 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10351 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10354 if (!optab1 || !optab2)
10355 return false;
10357 vec_mode = TYPE_MODE (vectype);
10358 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10359 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10360 return false;
10362 *code1 = c1;
10363 *code2 = c2;
10365 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10366 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10368 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10369 return true;
10370 /* For scalar masks we may have different boolean
10371 vector types having the same QImode. Thus we
10372 add additional check for elements number. */
10373 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10374 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10375 return true;
10378 /* Check if it's a multi-step conversion that can be done using intermediate
10379 types. */
10381 prev_type = vectype;
10382 prev_mode = vec_mode;
10384 if (!CONVERT_EXPR_CODE_P (code))
10385 return false;
10387 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10388 intermediate steps in promotion sequence. We try
10389 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10390 not. */
10391 interm_types->create (MAX_INTERM_CVT_STEPS);
10392 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10394 intermediate_mode = insn_data[icode1].operand[0].mode;
10395 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10397 intermediate_type = vect_halve_mask_nunits (prev_type);
10398 if (intermediate_mode != TYPE_MODE (intermediate_type))
10399 return false;
10401 else
10402 intermediate_type
10403 = lang_hooks.types.type_for_mode (intermediate_mode,
10404 TYPE_UNSIGNED (prev_type));
10406 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10407 && VECTOR_BOOLEAN_TYPE_P (prev_type)
10408 && intermediate_mode == prev_mode
10409 && SCALAR_INT_MODE_P (prev_mode))
10411 /* If the input and result modes are the same, a different optab
10412 is needed where we pass in the number of units in vectype. */
10413 optab3 = vec_unpacks_sbool_lo_optab;
10414 optab4 = vec_unpacks_sbool_hi_optab;
10416 else
10418 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10419 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10422 if (!optab3 || !optab4
10423 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10424 || insn_data[icode1].operand[0].mode != intermediate_mode
10425 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10426 || insn_data[icode2].operand[0].mode != intermediate_mode
10427 || ((icode1 = optab_handler (optab3, intermediate_mode))
10428 == CODE_FOR_nothing)
10429 || ((icode2 = optab_handler (optab4, intermediate_mode))
10430 == CODE_FOR_nothing))
10431 break;
10433 interm_types->quick_push (intermediate_type);
10434 (*multi_step_cvt)++;
10436 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10437 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10439 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10440 return true;
10441 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10442 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10443 return true;
10446 prev_type = intermediate_type;
10447 prev_mode = intermediate_mode;
10450 interm_types->release ();
10451 return false;
10455 /* Function supportable_narrowing_operation
10457 Check whether an operation represented by the code CODE is a
10458 narrowing operation that is supported by the target platform in
10459 vector form (i.e., when operating on arguments of type VECTYPE_IN
10460 and producing a result of type VECTYPE_OUT).
10462 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10463 and FLOAT. This function checks if these operations are supported by
10464 the target platform directly via vector tree-codes.
10466 Output:
10467 - CODE1 is the code of a vector operation to be used when
10468 vectorizing the operation, if available.
10469 - MULTI_STEP_CVT determines the number of required intermediate steps in
10470 case of multi-step conversion (like int->short->char - in that case
10471 MULTI_STEP_CVT will be 1).
10472 - INTERM_TYPES contains the intermediate type required to perform the
10473 narrowing operation (short in the above example). */
10475 bool
10476 supportable_narrowing_operation (enum tree_code code,
10477 tree vectype_out, tree vectype_in,
10478 enum tree_code *code1, int *multi_step_cvt,
10479 vec<tree> *interm_types)
10481 machine_mode vec_mode;
10482 enum insn_code icode1;
10483 optab optab1, interm_optab;
10484 tree vectype = vectype_in;
10485 tree narrow_vectype = vectype_out;
10486 enum tree_code c1;
10487 tree intermediate_type, prev_type;
10488 machine_mode intermediate_mode, prev_mode;
10489 int i;
10490 bool uns;
10492 *multi_step_cvt = 0;
10493 switch (code)
10495 CASE_CONVERT:
10496 c1 = VEC_PACK_TRUNC_EXPR;
10497 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
10498 && VECTOR_BOOLEAN_TYPE_P (vectype)
10499 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
10500 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10501 optab1 = vec_pack_sbool_trunc_optab;
10502 else
10503 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10504 break;
10506 case FIX_TRUNC_EXPR:
10507 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10508 /* The signedness is determined from output operand. */
10509 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10510 break;
10512 case FLOAT_EXPR:
10513 c1 = VEC_PACK_FLOAT_EXPR;
10514 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10515 break;
10517 default:
10518 gcc_unreachable ();
10521 if (!optab1)
10522 return false;
10524 vec_mode = TYPE_MODE (vectype);
10525 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10526 return false;
10528 *code1 = c1;
10530 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10532 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10533 return true;
10534 /* For scalar masks we may have different boolean
10535 vector types having the same QImode. Thus we
10536 add additional check for elements number. */
10537 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10538 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10539 return true;
10542 if (code == FLOAT_EXPR)
10543 return false;
10545 /* Check if it's a multi-step conversion that can be done using intermediate
10546 types. */
10547 prev_mode = vec_mode;
10548 prev_type = vectype;
10549 if (code == FIX_TRUNC_EXPR)
10550 uns = TYPE_UNSIGNED (vectype_out);
10551 else
10552 uns = TYPE_UNSIGNED (vectype);
10554 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10555 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10556 costly than signed. */
10557 if (code == FIX_TRUNC_EXPR && uns)
10559 enum insn_code icode2;
10561 intermediate_type
10562 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10563 interm_optab
10564 = optab_for_tree_code (c1, intermediate_type, optab_default);
10565 if (interm_optab != unknown_optab
10566 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10567 && insn_data[icode1].operand[0].mode
10568 == insn_data[icode2].operand[0].mode)
10570 uns = false;
10571 optab1 = interm_optab;
10572 icode1 = icode2;
10576 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10577 intermediate steps in promotion sequence. We try
10578 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10579 interm_types->create (MAX_INTERM_CVT_STEPS);
10580 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10582 intermediate_mode = insn_data[icode1].operand[0].mode;
10583 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10585 intermediate_type = vect_double_mask_nunits (prev_type);
10586 if (intermediate_mode != TYPE_MODE (intermediate_type))
10587 return false;
10589 else
10590 intermediate_type
10591 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10592 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10593 && VECTOR_BOOLEAN_TYPE_P (prev_type)
10594 && intermediate_mode == prev_mode
10595 && SCALAR_INT_MODE_P (prev_mode))
10596 interm_optab = vec_pack_sbool_trunc_optab;
10597 else
10598 interm_optab
10599 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10600 optab_default);
10601 if (!interm_optab
10602 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10603 || insn_data[icode1].operand[0].mode != intermediate_mode
10604 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10605 == CODE_FOR_nothing))
10606 break;
10608 interm_types->quick_push (intermediate_type);
10609 (*multi_step_cvt)++;
10611 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10613 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10614 return true;
10615 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10616 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10617 return true;
10620 prev_mode = intermediate_mode;
10621 prev_type = intermediate_type;
10622 optab1 = interm_optab;
10625 interm_types->release ();
10626 return false;
10629 /* Generate and return a statement that sets vector mask MASK such that
10630 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10632 gcall *
10633 vect_gen_while (tree mask, tree start_index, tree end_index)
10635 tree cmp_type = TREE_TYPE (start_index);
10636 tree mask_type = TREE_TYPE (mask);
10637 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10638 cmp_type, mask_type,
10639 OPTIMIZE_FOR_SPEED));
10640 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10641 start_index, end_index,
10642 build_zero_cst (mask_type));
10643 gimple_call_set_lhs (call, mask);
10644 return call;
10647 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10648 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10650 tree
10651 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10652 tree end_index)
10654 tree tmp = make_ssa_name (mask_type);
10655 gcall *call = vect_gen_while (tmp, start_index, end_index);
10656 gimple_seq_add_stmt (seq, call);
10657 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10660 /* Try to compute the vector types required to vectorize STMT_INFO,
10661 returning true on success and false if vectorization isn't possible.
10663 On success:
10665 - Set *STMT_VECTYPE_OUT to:
10666 - NULL_TREE if the statement doesn't need to be vectorized;
10667 - boolean_type_node if the statement is a boolean operation whose
10668 vector type can only be determined once all the other vector types
10669 are known; and
10670 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10672 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10673 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10674 statement does not help to determine the overall number of units. */
10676 opt_result
10677 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10678 tree *stmt_vectype_out,
10679 tree *nunits_vectype_out)
10681 gimple *stmt = stmt_info->stmt;
10683 *stmt_vectype_out = NULL_TREE;
10684 *nunits_vectype_out = NULL_TREE;
10686 if (gimple_get_lhs (stmt) == NULL_TREE
10687 /* MASK_STORE has no lhs, but is ok. */
10688 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10690 if (is_a <gcall *> (stmt))
10692 /* Ignore calls with no lhs. These must be calls to
10693 #pragma omp simd functions, and what vectorization factor
10694 it really needs can't be determined until
10695 vectorizable_simd_clone_call. */
10696 if (dump_enabled_p ())
10697 dump_printf_loc (MSG_NOTE, vect_location,
10698 "defer to SIMD clone analysis.\n");
10699 return opt_result::success ();
10702 return opt_result::failure_at (stmt,
10703 "not vectorized: irregular stmt.%G", stmt);
10706 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10707 return opt_result::failure_at (stmt,
10708 "not vectorized: vector stmt in loop:%G",
10709 stmt);
10711 tree vectype;
10712 tree scalar_type = NULL_TREE;
10713 if (STMT_VINFO_VECTYPE (stmt_info))
10714 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10715 else
10717 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10718 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10719 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10720 else
10721 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10723 /* Pure bool ops don't participate in number-of-units computation.
10724 For comparisons use the types being compared. */
10725 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10726 && is_gimple_assign (stmt)
10727 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10729 *stmt_vectype_out = boolean_type_node;
10731 tree rhs1 = gimple_assign_rhs1 (stmt);
10732 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10733 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10734 scalar_type = TREE_TYPE (rhs1);
10735 else
10737 if (dump_enabled_p ())
10738 dump_printf_loc (MSG_NOTE, vect_location,
10739 "pure bool operation.\n");
10740 return opt_result::success ();
10744 if (dump_enabled_p ())
10745 dump_printf_loc (MSG_NOTE, vect_location,
10746 "get vectype for scalar type: %T\n", scalar_type);
10747 vectype = get_vectype_for_scalar_type (scalar_type);
10748 if (!vectype)
10749 return opt_result::failure_at (stmt,
10750 "not vectorized:"
10751 " unsupported data-type %T\n",
10752 scalar_type);
10754 if (!*stmt_vectype_out)
10755 *stmt_vectype_out = vectype;
10757 if (dump_enabled_p ())
10758 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
10761 /* Don't try to compute scalar types if the stmt produces a boolean
10762 vector; use the existing vector type instead. */
10763 tree nunits_vectype;
10764 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10765 nunits_vectype = vectype;
10766 else
10768 /* The number of units is set according to the smallest scalar
10769 type (or the largest vector size, but we only support one
10770 vector size per vectorization). */
10771 if (*stmt_vectype_out != boolean_type_node)
10773 HOST_WIDE_INT dummy;
10774 scalar_type = vect_get_smallest_scalar_type (stmt_info,
10775 &dummy, &dummy);
10777 if (dump_enabled_p ())
10778 dump_printf_loc (MSG_NOTE, vect_location,
10779 "get vectype for scalar type: %T\n", scalar_type);
10780 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10782 if (!nunits_vectype)
10783 return opt_result::failure_at (stmt,
10784 "not vectorized: unsupported data-type %T\n",
10785 scalar_type);
10787 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10788 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10789 return opt_result::failure_at (stmt,
10790 "not vectorized: different sized vector "
10791 "types in statement, %T and %T\n",
10792 vectype, nunits_vectype);
10794 if (dump_enabled_p ())
10796 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10797 nunits_vectype);
10799 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10800 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10801 dump_printf (MSG_NOTE, "\n");
10804 *nunits_vectype_out = nunits_vectype;
10805 return opt_result::success ();
10808 /* Try to determine the correct vector type for STMT_INFO, which is a
10809 statement that produces a scalar boolean result. Return the vector
10810 type on success, otherwise return NULL_TREE. */
10812 opt_tree
10813 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10815 gimple *stmt = stmt_info->stmt;
10816 tree mask_type = NULL;
10817 tree vectype, scalar_type;
10819 if (is_gimple_assign (stmt)
10820 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10821 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10823 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10824 mask_type = get_mask_type_for_scalar_type (scalar_type);
10826 if (!mask_type)
10827 return opt_tree::failure_at (stmt,
10828 "not vectorized: unsupported mask\n");
10830 else
10832 tree rhs;
10833 ssa_op_iter iter;
10834 enum vect_def_type dt;
10836 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10838 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10839 return opt_tree::failure_at (stmt,
10840 "not vectorized:can't compute mask"
10841 " type for statement, %G", stmt);
10843 /* No vectype probably means external definition.
10844 Allow it in case there is another operand which
10845 allows to determine mask type. */
10846 if (!vectype)
10847 continue;
10849 if (!mask_type)
10850 mask_type = vectype;
10851 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10852 TYPE_VECTOR_SUBPARTS (vectype)))
10853 return opt_tree::failure_at (stmt,
10854 "not vectorized: different sized mask"
10855 " types in statement, %T and %T\n",
10856 mask_type, vectype);
10857 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10858 != VECTOR_BOOLEAN_TYPE_P (vectype))
10859 return opt_tree::failure_at (stmt,
10860 "not vectorized: mixed mask and "
10861 "nonmask vector types in statement, "
10862 "%T and %T\n",
10863 mask_type, vectype);
10866 /* We may compare boolean value loaded as vector of integers.
10867 Fix mask_type in such case. */
10868 if (mask_type
10869 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10870 && gimple_code (stmt) == GIMPLE_ASSIGN
10871 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10872 mask_type = build_same_sized_truth_vector_type (mask_type);
10875 /* No mask_type should mean loop invariant predicate.
10876 This is probably a subject for optimization in if-conversion. */
10877 if (!mask_type)
10878 return opt_tree::failure_at (stmt,
10879 "not vectorized: can't compute mask type "
10880 "for statement: %G", stmt);
10882 return opt_tree::success (mask_type);