Default to dwarf version 4 on hppa64-hpux
[official-gcc.git] / gcc / tree-vect-stmts.c
bloba9c9e3d7c3737330957abea48f1dcbe929638417
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
781 if (fatal)
782 *fatal = false;
783 return res;
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
806 int inside_cost = 0, prologue_cost = 0;
808 gcc_assert (cost_vec != NULL);
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
842 static void
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
844 enum vect_def_type *dt,
845 unsigned int ncopies, int pwr,
846 stmt_vector_for_cost *cost_vec,
847 bool widen_arith)
849 int i;
850 int inside_cost = 0, prologue_cost = 0;
852 for (i = 0; i < pwr + 1; i++)
854 inside_cost += record_stmt_cost (cost_vec, ncopies,
855 widen_arith
856 ? vector_stmt : vec_promote_demote,
857 stmt_info, 0, vect_body);
858 ncopies *= 2;
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
865 stmt_info, 0, vect_prologue);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 /* Returns true if the current function returns DECL. */
875 static bool
876 cfun_returns (tree decl)
878 edge_iterator ei;
879 edge e;
880 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
882 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
883 if (!ret)
884 continue;
885 if (gimple_return_retval (ret) == decl)
886 return true;
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
889 though. */
890 gimple *def = ret;
893 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
895 while (gimple_clobber_p (def));
896 if (is_a <gassign *> (def)
897 && gimple_assign_lhs (def) == gimple_return_retval (ret)
898 && gimple_assign_rhs1 (def) == decl)
899 return true;
901 return false;
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
909 static void
910 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
911 vect_memory_access_type memory_access_type,
912 vec_load_store_type vls_type, slp_tree slp_node,
913 stmt_vector_for_cost *cost_vec)
915 unsigned int inside_cost = 0, prologue_cost = 0;
916 stmt_vec_info first_stmt_info = stmt_info;
917 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
919 /* ??? Somehow we need to fix this at the callers. */
920 if (slp_node)
921 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
923 if (vls_type == VLS_STORE_INVARIANT)
925 if (!slp_node)
926 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
927 stmt_info, 0, vect_prologue);
930 /* Grouped stores update all elements in the group at once,
931 so we want the DR for the first statement. */
932 if (!slp_node && grouped_access_p)
933 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
935 /* True if we should include any once-per-group costs as well as
936 the cost of the statement itself. For SLP we only get called
937 once per group anyhow. */
938 bool first_stmt_p = (first_stmt_info == stmt_info);
940 /* We assume that the cost of a single store-lanes instruction is
941 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
942 access is instead being provided by a permute-and-store operation,
943 include the cost of the permutes. */
944 if (first_stmt_p
945 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
947 /* Uses a high and low interleave or shuffle operations for each
948 needed permute. */
949 int group_size = DR_GROUP_SIZE (first_stmt_info);
950 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
951 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
952 stmt_info, 0, vect_body);
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_NOTE, vect_location,
956 "vect_model_store_cost: strided group_size = %d .\n",
957 group_size);
960 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
961 /* Costs of the stores. */
962 if (memory_access_type == VMAT_ELEMENTWISE
963 || memory_access_type == VMAT_GATHER_SCATTER)
965 /* N scalar stores plus extracting the elements. */
966 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
967 inside_cost += record_stmt_cost (cost_vec,
968 ncopies * assumed_nunits,
969 scalar_store, stmt_info, 0, vect_body);
971 else
972 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
974 if (memory_access_type == VMAT_ELEMENTWISE
975 || memory_access_type == VMAT_STRIDED_SLP)
977 /* N scalar stores plus extracting the elements. */
978 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
979 inside_cost += record_stmt_cost (cost_vec,
980 ncopies * assumed_nunits,
981 vec_to_scalar, stmt_info, 0, vect_body);
984 /* When vectorizing a store into the function result assign
985 a penalty if the function returns in a multi-register location.
986 In this case we assume we'll end up with having to spill the
987 vector result and do piecewise loads as a conservative estimate. */
988 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
989 if (base
990 && (TREE_CODE (base) == RESULT_DECL
991 || (DECL_P (base) && cfun_returns (base)))
992 && !aggregate_value_p (base, cfun->decl))
994 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
995 /* ??? Handle PARALLEL in some way. */
996 if (REG_P (reg))
998 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
999 /* Assume that a single reg-reg move is possible and cheap,
1000 do not account for vector to gp register move cost. */
1001 if (nregs > 1)
1003 /* Spill. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1005 vector_store,
1006 stmt_info, 0, vect_epilogue);
1007 /* Loads. */
1008 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1009 scalar_load,
1010 stmt_info, 0, vect_epilogue);
1015 if (dump_enabled_p ())
1016 dump_printf_loc (MSG_NOTE, vect_location,
1017 "vect_model_store_cost: inside_cost = %d, "
1018 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1022 /* Calculate cost of DR's memory access. */
1023 void
1024 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1025 unsigned int *inside_cost,
1026 stmt_vector_for_cost *body_cost_vec)
1028 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1029 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1030 int alignment_support_scheme
1031 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, false);
1033 switch (alignment_support_scheme)
1035 case dr_aligned:
1037 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1038 vector_store, stmt_info, 0,
1039 vect_body);
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE, vect_location,
1043 "vect_model_store_cost: aligned.\n");
1044 break;
1047 case dr_unaligned_supported:
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1051 unaligned_store, stmt_info,
1052 dr_misalignment (dr_info, vectype),
1053 vect_body);
1054 if (dump_enabled_p ())
1055 dump_printf_loc (MSG_NOTE, vect_location,
1056 "vect_model_store_cost: unaligned supported by "
1057 "hardware.\n");
1058 break;
1061 case dr_unaligned_unsupported:
1063 *inside_cost = VECT_MAX_COST;
1065 if (dump_enabled_p ())
1066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1067 "vect_model_store_cost: unsupported access.\n");
1068 break;
1071 default:
1072 gcc_unreachable ();
1077 /* Function vect_model_load_cost
1079 Models cost for loads. In the case of grouped accesses, one access has
1080 the overhead of the grouped access attributed to it. Since unaligned
1081 accesses are supported for loads, we also account for the costs of the
1082 access scheme chosen. */
1084 static void
1085 vect_model_load_cost (vec_info *vinfo,
1086 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1087 vect_memory_access_type memory_access_type,
1088 gather_scatter_info *gs_info,
1089 slp_tree slp_node,
1090 stmt_vector_for_cost *cost_vec)
1092 unsigned int inside_cost = 0, prologue_cost = 0;
1093 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1095 gcc_assert (cost_vec);
1097 /* ??? Somehow we need to fix this at the callers. */
1098 if (slp_node)
1099 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1101 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1106 /* Record the cost for the permutation. */
1107 unsigned n_perms, n_loads;
1108 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1109 vf, true, &n_perms, &n_loads);
1110 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1111 first_stmt_info, 0, vect_body);
1113 /* And adjust the number of loads performed. This handles
1114 redundancies as well as loads that are later dead. */
1115 ncopies = n_loads;
1118 /* Grouped loads read all elements in the group at once,
1119 so we want the DR for the first statement. */
1120 stmt_vec_info first_stmt_info = stmt_info;
1121 if (!slp_node && grouped_access_p)
1122 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1124 /* True if we should include any once-per-group costs as well as
1125 the cost of the statement itself. For SLP we only get called
1126 once per group anyhow. */
1127 bool first_stmt_p = (first_stmt_info == stmt_info);
1129 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1130 ones we actually need. Account for the cost of unused results. */
1131 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1133 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1134 stmt_vec_info next_stmt_info = first_stmt_info;
1137 gaps -= 1;
1138 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1140 while (next_stmt_info);
1141 if (gaps)
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "vect_model_load_cost: %d unused vectors.\n",
1146 gaps);
1147 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps, false,
1148 &inside_cost, &prologue_cost,
1149 cost_vec, cost_vec, true);
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1157 if (first_stmt_p
1158 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size = DR_GROUP_SIZE (first_stmt_info);
1163 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1164 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1165 stmt_info, 0, vect_body);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1170 group_size);
1173 /* The loads themselves. */
1174 if (memory_access_type == VMAT_ELEMENTWISE
1175 || memory_access_type == VMAT_GATHER_SCATTER)
1177 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1178 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1179 if (memory_access_type == VMAT_GATHER_SCATTER
1180 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1181 /* For emulated gathers N offset vector element extracts
1182 (we assume the scalar scaling and ptr + offset add is consumed by
1183 the load). */
1184 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1185 vec_to_scalar, stmt_info, 0,
1186 vect_body);
1187 /* N scalar loads plus gathering them into a vector. */
1188 inside_cost += record_stmt_cost (cost_vec,
1189 ncopies * assumed_nunits,
1190 scalar_load, stmt_info, 0, vect_body);
1192 else
1193 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1194 &inside_cost, &prologue_cost,
1195 cost_vec, cost_vec, true);
1196 if (memory_access_type == VMAT_ELEMENTWISE
1197 || memory_access_type == VMAT_STRIDED_SLP
1198 || (memory_access_type == VMAT_GATHER_SCATTER
1199 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1200 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1201 stmt_info, 0, vect_body);
1203 if (dump_enabled_p ())
1204 dump_printf_loc (MSG_NOTE, vect_location,
1205 "vect_model_load_cost: inside_cost = %d, "
1206 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1210 /* Calculate cost of DR's memory access. */
1211 void
1212 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1213 bool add_realign_cost, unsigned int *inside_cost,
1214 unsigned int *prologue_cost,
1215 stmt_vector_for_cost *prologue_cost_vec,
1216 stmt_vector_for_cost *body_cost_vec,
1217 bool record_prologue_costs)
1219 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1220 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1221 int alignment_support_scheme
1222 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, false);
1224 switch (alignment_support_scheme)
1226 case dr_aligned:
1228 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1229 stmt_info, 0, vect_body);
1231 if (dump_enabled_p ())
1232 dump_printf_loc (MSG_NOTE, vect_location,
1233 "vect_model_load_cost: aligned.\n");
1235 break;
1237 case dr_unaligned_supported:
1239 /* Here, we assign an additional cost for the unaligned load. */
1240 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1241 unaligned_load, stmt_info,
1242 dr_misalignment (dr_info, vectype),
1243 vect_body);
1245 if (dump_enabled_p ())
1246 dump_printf_loc (MSG_NOTE, vect_location,
1247 "vect_model_load_cost: unaligned supported by "
1248 "hardware.\n");
1250 break;
1252 case dr_explicit_realign:
1254 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1255 vector_load, stmt_info, 0, vect_body);
1256 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1257 vec_perm, stmt_info, 0, vect_body);
1259 /* FIXME: If the misalignment remains fixed across the iterations of
1260 the containing loop, the following cost should be added to the
1261 prologue costs. */
1262 if (targetm.vectorize.builtin_mask_for_load)
1263 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1264 stmt_info, 0, vect_body);
1266 if (dump_enabled_p ())
1267 dump_printf_loc (MSG_NOTE, vect_location,
1268 "vect_model_load_cost: explicit realign\n");
1270 break;
1272 case dr_explicit_realign_optimized:
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE, vect_location,
1276 "vect_model_load_cost: unaligned software "
1277 "pipelined.\n");
1279 /* Unaligned software pipeline has a load of an address, an initial
1280 load, and possibly a mask operation to "prime" the loop. However,
1281 if this is an access in a group of loads, which provide grouped
1282 access, then the above cost should only be considered for one
1283 access in the group. Inside the loop, there is a load op
1284 and a realignment op. */
1286 if (add_realign_cost && record_prologue_costs)
1288 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1289 vector_stmt, stmt_info,
1290 0, vect_prologue);
1291 if (targetm.vectorize.builtin_mask_for_load)
1292 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1293 vector_stmt, stmt_info,
1294 0, vect_prologue);
1297 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1298 stmt_info, 0, vect_body);
1299 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1300 stmt_info, 0, vect_body);
1302 if (dump_enabled_p ())
1303 dump_printf_loc (MSG_NOTE, vect_location,
1304 "vect_model_load_cost: explicit realign optimized"
1305 "\n");
1307 break;
1310 case dr_unaligned_unsupported:
1312 *inside_cost = VECT_MAX_COST;
1314 if (dump_enabled_p ())
1315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1316 "vect_model_load_cost: unsupported access.\n");
1317 break;
1320 default:
1321 gcc_unreachable ();
1325 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1326 the loop preheader for the vectorized stmt STMT_VINFO. */
1328 static void
1329 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1330 gimple_stmt_iterator *gsi)
1332 if (gsi)
1333 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1334 else
1335 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1337 if (dump_enabled_p ())
1338 dump_printf_loc (MSG_NOTE, vect_location,
1339 "created new init_stmt: %G", new_stmt);
1342 /* Function vect_init_vector.
1344 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1345 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1346 vector type a vector with all elements equal to VAL is created first.
1347 Place the initialization at GSI if it is not NULL. Otherwise, place the
1348 initialization at the loop preheader.
1349 Return the DEF of INIT_STMT.
1350 It will be used in the vectorization of STMT_INFO. */
1352 tree
1353 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1354 gimple_stmt_iterator *gsi)
1356 gimple *init_stmt;
1357 tree new_temp;
1359 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1360 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1362 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1363 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1365 /* Scalar boolean value should be transformed into
1366 all zeros or all ones value before building a vector. */
1367 if (VECTOR_BOOLEAN_TYPE_P (type))
1369 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1370 tree false_val = build_zero_cst (TREE_TYPE (type));
1372 if (CONSTANT_CLASS_P (val))
1373 val = integer_zerop (val) ? false_val : true_val;
1374 else
1376 new_temp = make_ssa_name (TREE_TYPE (type));
1377 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1378 val, true_val, false_val);
1379 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1380 val = new_temp;
1383 else
1385 gimple_seq stmts = NULL;
1386 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1387 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1388 TREE_TYPE (type), val);
1389 else
1390 /* ??? Condition vectorization expects us to do
1391 promotion of invariant/external defs. */
1392 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1393 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1394 !gsi_end_p (gsi2); )
1396 init_stmt = gsi_stmt (gsi2);
1397 gsi_remove (&gsi2, false);
1398 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1402 val = build_vector_from_val (type, val);
1405 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1406 init_stmt = gimple_build_assign (new_temp, val);
1407 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1408 return new_temp;
1412 /* Function vect_get_vec_defs_for_operand.
1414 OP is an operand in STMT_VINFO. This function returns a vector of
1415 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1417 In the case that OP is an SSA_NAME which is defined in the loop, then
1418 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1420 In case OP is an invariant or constant, a new stmt that creates a vector def
1421 needs to be introduced. VECTYPE may be used to specify a required type for
1422 vector invariant. */
1424 void
1425 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1426 unsigned ncopies,
1427 tree op, vec<tree> *vec_oprnds, tree vectype)
1429 gimple *def_stmt;
1430 enum vect_def_type dt;
1431 bool is_simple_use;
1432 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1434 if (dump_enabled_p ())
1435 dump_printf_loc (MSG_NOTE, vect_location,
1436 "vect_get_vec_defs_for_operand: %T\n", op);
1438 stmt_vec_info def_stmt_info;
1439 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1440 &def_stmt_info, &def_stmt);
1441 gcc_assert (is_simple_use);
1442 if (def_stmt && dump_enabled_p ())
1443 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1445 vec_oprnds->create (ncopies);
1446 if (dt == vect_constant_def || dt == vect_external_def)
1448 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1449 tree vector_type;
1451 if (vectype)
1452 vector_type = vectype;
1453 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1454 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1455 vector_type = truth_type_for (stmt_vectype);
1456 else
1457 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1459 gcc_assert (vector_type);
1460 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1461 while (ncopies--)
1462 vec_oprnds->quick_push (vop);
1464 else
1466 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1467 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1468 for (unsigned i = 0; i < ncopies; ++i)
1469 vec_oprnds->quick_push (gimple_get_lhs
1470 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1475 /* Get vectorized definitions for OP0 and OP1. */
1477 void
1478 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1479 unsigned ncopies,
1480 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1481 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1482 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1483 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1485 if (slp_node)
1487 if (op0)
1488 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1489 if (op1)
1490 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1491 if (op2)
1492 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1493 if (op3)
1494 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1496 else
1498 if (op0)
1499 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1500 op0, vec_oprnds0, vectype0);
1501 if (op1)
1502 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1503 op1, vec_oprnds1, vectype1);
1504 if (op2)
1505 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1506 op2, vec_oprnds2, vectype2);
1507 if (op3)
1508 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1509 op3, vec_oprnds3, vectype3);
1513 void
1514 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1515 unsigned ncopies,
1516 tree op0, vec<tree> *vec_oprnds0,
1517 tree op1, vec<tree> *vec_oprnds1,
1518 tree op2, vec<tree> *vec_oprnds2,
1519 tree op3, vec<tree> *vec_oprnds3)
1521 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1522 op0, vec_oprnds0, NULL_TREE,
1523 op1, vec_oprnds1, NULL_TREE,
1524 op2, vec_oprnds2, NULL_TREE,
1525 op3, vec_oprnds3, NULL_TREE);
1528 /* Helper function called by vect_finish_replace_stmt and
1529 vect_finish_stmt_generation. Set the location of the new
1530 statement and create and return a stmt_vec_info for it. */
1532 static void
1533 vect_finish_stmt_generation_1 (vec_info *,
1534 stmt_vec_info stmt_info, gimple *vec_stmt)
1536 if (dump_enabled_p ())
1537 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1539 if (stmt_info)
1541 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1543 /* While EH edges will generally prevent vectorization, stmt might
1544 e.g. be in a must-not-throw region. Ensure newly created stmts
1545 that could throw are part of the same region. */
1546 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1547 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1548 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1550 else
1551 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1554 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1555 which sets the same scalar result as STMT_INFO did. Create and return a
1556 stmt_vec_info for VEC_STMT. */
1558 void
1559 vect_finish_replace_stmt (vec_info *vinfo,
1560 stmt_vec_info stmt_info, gimple *vec_stmt)
1562 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1563 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1565 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1566 gsi_replace (&gsi, vec_stmt, true);
1568 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1571 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1572 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1574 void
1575 vect_finish_stmt_generation (vec_info *vinfo,
1576 stmt_vec_info stmt_info, gimple *vec_stmt,
1577 gimple_stmt_iterator *gsi)
1579 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1581 if (!gsi_end_p (*gsi)
1582 && gimple_has_mem_ops (vec_stmt))
1584 gimple *at_stmt = gsi_stmt (*gsi);
1585 tree vuse = gimple_vuse (at_stmt);
1586 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1588 tree vdef = gimple_vdef (at_stmt);
1589 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1590 gimple_set_modified (vec_stmt, true);
1591 /* If we have an SSA vuse and insert a store, update virtual
1592 SSA form to avoid triggering the renamer. Do so only
1593 if we can easily see all uses - which is what almost always
1594 happens with the way vectorized stmts are inserted. */
1595 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1596 && ((is_gimple_assign (vec_stmt)
1597 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1598 || (is_gimple_call (vec_stmt)
1599 && !(gimple_call_flags (vec_stmt)
1600 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1602 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1603 gimple_set_vdef (vec_stmt, new_vdef);
1604 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1608 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1609 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1612 /* We want to vectorize a call to combined function CFN with function
1613 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1614 as the types of all inputs. Check whether this is possible using
1615 an internal function, returning its code if so or IFN_LAST if not. */
1617 static internal_fn
1618 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1619 tree vectype_out, tree vectype_in)
1621 internal_fn ifn;
1622 if (internal_fn_p (cfn))
1623 ifn = as_internal_fn (cfn);
1624 else
1625 ifn = associated_internal_fn (fndecl);
1626 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1628 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1629 if (info.vectorizable)
1631 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1632 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1633 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1634 OPTIMIZE_FOR_SPEED))
1635 return ifn;
1638 return IFN_LAST;
1642 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1643 gimple_stmt_iterator *);
1645 /* Check whether a load or store statement in the loop described by
1646 LOOP_VINFO is possible in a loop using partial vectors. This is
1647 testing whether the vectorizer pass has the appropriate support,
1648 as well as whether the target does.
1650 VLS_TYPE says whether the statement is a load or store and VECTYPE
1651 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1652 says how the load or store is going to be implemented and GROUP_SIZE
1653 is the number of load or store statements in the containing group.
1654 If the access is a gather load or scatter store, GS_INFO describes
1655 its arguments. If the load or store is conditional, SCALAR_MASK is the
1656 condition under which it occurs.
1658 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1659 vectors is not supported, otherwise record the required rgroup control
1660 types. */
1662 static void
1663 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1664 vec_load_store_type vls_type,
1665 int group_size,
1666 vect_memory_access_type
1667 memory_access_type,
1668 gather_scatter_info *gs_info,
1669 tree scalar_mask)
1671 /* Invariant loads need no special support. */
1672 if (memory_access_type == VMAT_INVARIANT)
1673 return;
1675 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1676 machine_mode vecmode = TYPE_MODE (vectype);
1677 bool is_load = (vls_type == VLS_LOAD);
1678 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1680 if (is_load
1681 ? !vect_load_lanes_supported (vectype, group_size, true)
1682 : !vect_store_lanes_supported (vectype, group_size, true))
1684 if (dump_enabled_p ())
1685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686 "can't operate on partial vectors because"
1687 " the target doesn't have an appropriate"
1688 " load/store-lanes instruction.\n");
1689 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1690 return;
1692 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1693 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1694 return;
1697 if (memory_access_type == VMAT_GATHER_SCATTER)
1699 internal_fn ifn = (is_load
1700 ? IFN_MASK_GATHER_LOAD
1701 : IFN_MASK_SCATTER_STORE);
1702 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1703 gs_info->memory_type,
1704 gs_info->offset_vectype,
1705 gs_info->scale))
1707 if (dump_enabled_p ())
1708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1709 "can't operate on partial vectors because"
1710 " the target doesn't have an appropriate"
1711 " gather load or scatter store instruction.\n");
1712 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1713 return;
1715 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1716 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1717 return;
1720 if (memory_access_type != VMAT_CONTIGUOUS
1721 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1723 /* Element X of the data must come from iteration i * VF + X of the
1724 scalar loop. We need more work to support other mappings. */
1725 if (dump_enabled_p ())
1726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1727 "can't operate on partial vectors because an"
1728 " access isn't contiguous.\n");
1729 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1730 return;
1733 if (!VECTOR_MODE_P (vecmode))
1735 if (dump_enabled_p ())
1736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1737 "can't operate on partial vectors when emulating"
1738 " vector operations.\n");
1739 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1740 return;
1743 /* We might load more scalars than we need for permuting SLP loads.
1744 We checked in get_group_load_store_type that the extra elements
1745 don't leak into a new vector. */
1746 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1748 unsigned int nvectors;
1749 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1750 return nvectors;
1751 gcc_unreachable ();
1754 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1755 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1756 machine_mode mask_mode;
1757 bool using_partial_vectors_p = false;
1758 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1759 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1761 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1762 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1763 using_partial_vectors_p = true;
1766 machine_mode vmode;
1767 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1769 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1770 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1771 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1772 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1773 using_partial_vectors_p = true;
1776 if (!using_partial_vectors_p)
1778 if (dump_enabled_p ())
1779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1780 "can't operate on partial vectors because the"
1781 " target doesn't have the appropriate partial"
1782 " vectorization load or store.\n");
1783 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1787 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1788 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1789 that needs to be applied to all loads and stores in a vectorized loop.
1790 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1792 MASK_TYPE is the type of both masks. If new statements are needed,
1793 insert them before GSI. */
1795 static tree
1796 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1797 gimple_stmt_iterator *gsi)
1799 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1800 if (!loop_mask)
1801 return vec_mask;
1803 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1804 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1805 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1806 vec_mask, loop_mask);
1807 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1808 return and_res;
1811 /* Determine whether we can use a gather load or scatter store to vectorize
1812 strided load or store STMT_INFO by truncating the current offset to a
1813 smaller width. We need to be able to construct an offset vector:
1815 { 0, X, X*2, X*3, ... }
1817 without loss of precision, where X is STMT_INFO's DR_STEP.
1819 Return true if this is possible, describing the gather load or scatter
1820 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1822 static bool
1823 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1824 loop_vec_info loop_vinfo, bool masked_p,
1825 gather_scatter_info *gs_info)
1827 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1828 data_reference *dr = dr_info->dr;
1829 tree step = DR_STEP (dr);
1830 if (TREE_CODE (step) != INTEGER_CST)
1832 /* ??? Perhaps we could use range information here? */
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_NOTE, vect_location,
1835 "cannot truncate variable step.\n");
1836 return false;
1839 /* Get the number of bits in an element. */
1840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1841 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1842 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1844 /* Set COUNT to the upper limit on the number of elements - 1.
1845 Start with the maximum vectorization factor. */
1846 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1848 /* Try lowering COUNT to the number of scalar latch iterations. */
1849 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1850 widest_int max_iters;
1851 if (max_loop_iterations (loop, &max_iters)
1852 && max_iters < count)
1853 count = max_iters.to_shwi ();
1855 /* Try scales of 1 and the element size. */
1856 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1857 wi::overflow_type overflow = wi::OVF_NONE;
1858 for (int i = 0; i < 2; ++i)
1860 int scale = scales[i];
1861 widest_int factor;
1862 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1863 continue;
1865 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1866 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1867 if (overflow)
1868 continue;
1869 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1870 unsigned int min_offset_bits = wi::min_precision (range, sign);
1872 /* Find the narrowest viable offset type. */
1873 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1874 tree offset_type = build_nonstandard_integer_type (offset_bits,
1875 sign == UNSIGNED);
1877 /* See whether the target supports the operation with an offset
1878 no narrower than OFFSET_TYPE. */
1879 tree memory_type = TREE_TYPE (DR_REF (dr));
1880 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1881 vectype, memory_type, offset_type, scale,
1882 &gs_info->ifn, &gs_info->offset_vectype)
1883 || gs_info->ifn == IFN_LAST)
1884 continue;
1886 gs_info->decl = NULL_TREE;
1887 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1888 but we don't need to store that here. */
1889 gs_info->base = NULL_TREE;
1890 gs_info->element_type = TREE_TYPE (vectype);
1891 gs_info->offset = fold_convert (offset_type, step);
1892 gs_info->offset_dt = vect_constant_def;
1893 gs_info->scale = scale;
1894 gs_info->memory_type = memory_type;
1895 return true;
1898 if (overflow && dump_enabled_p ())
1899 dump_printf_loc (MSG_NOTE, vect_location,
1900 "truncating gather/scatter offset to %d bits"
1901 " might change its value.\n", element_bits);
1903 return false;
1906 /* Return true if we can use gather/scatter internal functions to
1907 vectorize STMT_INFO, which is a grouped or strided load or store.
1908 MASKED_P is true if load or store is conditional. When returning
1909 true, fill in GS_INFO with the information required to perform the
1910 operation. */
1912 static bool
1913 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1914 loop_vec_info loop_vinfo, bool masked_p,
1915 gather_scatter_info *gs_info)
1917 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1918 || gs_info->ifn == IFN_LAST)
1919 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1920 masked_p, gs_info);
1922 tree old_offset_type = TREE_TYPE (gs_info->offset);
1923 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1925 gcc_assert (TYPE_PRECISION (new_offset_type)
1926 >= TYPE_PRECISION (old_offset_type));
1927 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1929 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_NOTE, vect_location,
1931 "using gather/scatter for strided/grouped access,"
1932 " scale = %d\n", gs_info->scale);
1934 return true;
1937 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1938 elements with a known constant step. Return -1 if that step
1939 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1941 static int
1942 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1944 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1945 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1946 size_zero_node);
1949 /* If the target supports a permute mask that reverses the elements in
1950 a vector of type VECTYPE, return that mask, otherwise return null. */
1952 static tree
1953 perm_mask_for_reverse (tree vectype)
1955 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1957 /* The encoding has a single stepped pattern. */
1958 vec_perm_builder sel (nunits, 1, 3);
1959 for (int i = 0; i < 3; ++i)
1960 sel.quick_push (nunits - 1 - i);
1962 vec_perm_indices indices (sel, 1, nunits);
1963 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1964 return NULL_TREE;
1965 return vect_gen_perm_mask_checked (vectype, indices);
1968 /* A subroutine of get_load_store_type, with a subset of the same
1969 arguments. Handle the case where STMT_INFO is a load or store that
1970 accesses consecutive elements with a negative step. */
1972 static vect_memory_access_type
1973 get_negative_load_store_type (vec_info *vinfo,
1974 stmt_vec_info stmt_info, tree vectype,
1975 vec_load_store_type vls_type,
1976 unsigned int ncopies)
1978 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1979 dr_alignment_support alignment_support_scheme;
1981 if (ncopies > 1)
1983 if (dump_enabled_p ())
1984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1985 "multiple types with negative step.\n");
1986 return VMAT_ELEMENTWISE;
1989 alignment_support_scheme = vect_supportable_dr_alignment (vinfo, dr_info,
1990 vectype, false);
1991 if (alignment_support_scheme != dr_aligned
1992 && alignment_support_scheme != dr_unaligned_supported)
1994 if (dump_enabled_p ())
1995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1996 "negative step but alignment required.\n");
1997 return VMAT_ELEMENTWISE;
2000 if (vls_type == VLS_STORE_INVARIANT)
2002 if (dump_enabled_p ())
2003 dump_printf_loc (MSG_NOTE, vect_location,
2004 "negative step with invariant source;"
2005 " no permute needed.\n");
2006 return VMAT_CONTIGUOUS_DOWN;
2009 if (!perm_mask_for_reverse (vectype))
2011 if (dump_enabled_p ())
2012 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2013 "negative step and reversing not supported.\n");
2014 return VMAT_ELEMENTWISE;
2017 return VMAT_CONTIGUOUS_REVERSE;
2020 /* STMT_INFO is either a masked or unconditional store. Return the value
2021 being stored. */
2023 tree
2024 vect_get_store_rhs (stmt_vec_info stmt_info)
2026 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2028 gcc_assert (gimple_assign_single_p (assign));
2029 return gimple_assign_rhs1 (assign);
2031 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2033 internal_fn ifn = gimple_call_internal_fn (call);
2034 int index = internal_fn_stored_value_index (ifn);
2035 gcc_assert (index >= 0);
2036 return gimple_call_arg (call, index);
2038 gcc_unreachable ();
2041 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2043 This function returns a vector type which can be composed with NETLS pieces,
2044 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2045 same vector size as the return vector. It checks target whether supports
2046 pieces-size vector mode for construction firstly, if target fails to, check
2047 pieces-size scalar mode for construction further. It returns NULL_TREE if
2048 fails to find the available composition.
2050 For example, for (vtype=V16QI, nelts=4), we can probably get:
2051 - V16QI with PTYPE V4QI.
2052 - V4SI with PTYPE SI.
2053 - NULL_TREE. */
2055 static tree
2056 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2058 gcc_assert (VECTOR_TYPE_P (vtype));
2059 gcc_assert (known_gt (nelts, 0U));
2061 machine_mode vmode = TYPE_MODE (vtype);
2062 if (!VECTOR_MODE_P (vmode))
2063 return NULL_TREE;
2065 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2066 unsigned int pbsize;
2067 if (constant_multiple_p (vbsize, nelts, &pbsize))
2069 /* First check if vec_init optab supports construction from
2070 vector pieces directly. */
2071 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2072 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2073 machine_mode rmode;
2074 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2075 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2076 != CODE_FOR_nothing))
2078 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2079 return vtype;
2082 /* Otherwise check if exists an integer type of the same piece size and
2083 if vec_init optab supports construction from it directly. */
2084 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2085 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2086 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2087 != CODE_FOR_nothing))
2089 *ptype = build_nonstandard_integer_type (pbsize, 1);
2090 return build_vector_type (*ptype, nelts);
2094 return NULL_TREE;
2097 /* A subroutine of get_load_store_type, with a subset of the same
2098 arguments. Handle the case where STMT_INFO is part of a grouped load
2099 or store.
2101 For stores, the statements in the group are all consecutive
2102 and there is no gap at the end. For loads, the statements in the
2103 group might not be consecutive; there can be gaps between statements
2104 as well as at the end. */
2106 static bool
2107 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2108 tree vectype, slp_tree slp_node,
2109 bool masked_p, vec_load_store_type vls_type,
2110 vect_memory_access_type *memory_access_type,
2111 dr_alignment_support *alignment_support_scheme,
2112 gather_scatter_info *gs_info)
2114 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2115 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2116 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2117 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2118 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2119 bool single_element_p = (stmt_info == first_stmt_info
2120 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2121 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2122 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2124 /* True if the vectorized statements would access beyond the last
2125 statement in the group. */
2126 bool overrun_p = false;
2128 /* True if we can cope with such overrun by peeling for gaps, so that
2129 there is at least one final scalar iteration after the vector loop. */
2130 bool can_overrun_p = (!masked_p
2131 && vls_type == VLS_LOAD
2132 && loop_vinfo
2133 && !loop->inner);
2135 /* There can only be a gap at the end of the group if the stride is
2136 known at compile time. */
2137 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2139 /* Stores can't yet have gaps. */
2140 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2142 if (slp_node)
2144 /* For SLP vectorization we directly vectorize a subchain
2145 without permutation. */
2146 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2147 first_dr_info
2148 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2149 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2151 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2152 separated by the stride, until we have a complete vector.
2153 Fall back to scalar accesses if that isn't possible. */
2154 if (multiple_p (nunits, group_size))
2155 *memory_access_type = VMAT_STRIDED_SLP;
2156 else
2157 *memory_access_type = VMAT_ELEMENTWISE;
2159 else
2161 overrun_p = loop_vinfo && gap != 0;
2162 if (overrun_p && vls_type != VLS_LOAD)
2164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2165 "Grouped store with gaps requires"
2166 " non-consecutive accesses\n");
2167 return false;
2169 /* An overrun is fine if the trailing elements are smaller
2170 than the alignment boundary B. Every vector access will
2171 be a multiple of B and so we are guaranteed to access a
2172 non-gap element in the same B-sized block. */
2173 if (overrun_p
2174 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2175 vectype)
2176 / vect_get_scalar_dr_size (first_dr_info)))
2177 overrun_p = false;
2179 /* If the gap splits the vector in half and the target
2180 can do half-vector operations avoid the epilogue peeling
2181 by simply loading half of the vector only. Usually
2182 the construction with an upper zero half will be elided. */
2183 dr_alignment_support alignment_support_scheme;
2184 tree half_vtype;
2185 if (overrun_p
2186 && !masked_p
2187 && (((alignment_support_scheme
2188 = vect_supportable_dr_alignment (vinfo, first_dr_info,
2189 vectype, false)))
2190 == dr_aligned
2191 || alignment_support_scheme == dr_unaligned_supported)
2192 && known_eq (nunits, (group_size - gap) * 2)
2193 && known_eq (nunits, group_size)
2194 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2195 != NULL_TREE))
2196 overrun_p = false;
2198 if (overrun_p && !can_overrun_p)
2200 if (dump_enabled_p ())
2201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2202 "Peeling for outer loop is not supported\n");
2203 return false;
2205 int cmp = compare_step_with_zero (vinfo, stmt_info);
2206 if (cmp < 0)
2208 if (single_element_p)
2209 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2210 only correct for single element "interleaving" SLP. */
2211 *memory_access_type = get_negative_load_store_type
2212 (vinfo, stmt_info, vectype, vls_type, 1);
2213 else
2215 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2216 separated by the stride, until we have a complete vector.
2217 Fall back to scalar accesses if that isn't possible. */
2218 if (multiple_p (nunits, group_size))
2219 *memory_access_type = VMAT_STRIDED_SLP;
2220 else
2221 *memory_access_type = VMAT_ELEMENTWISE;
2224 else
2226 gcc_assert (!loop_vinfo || cmp > 0);
2227 *memory_access_type = VMAT_CONTIGUOUS;
2231 else
2233 /* We can always handle this case using elementwise accesses,
2234 but see if something more efficient is available. */
2235 *memory_access_type = VMAT_ELEMENTWISE;
2237 /* If there is a gap at the end of the group then these optimizations
2238 would access excess elements in the last iteration. */
2239 bool would_overrun_p = (gap != 0);
2240 /* An overrun is fine if the trailing elements are smaller than the
2241 alignment boundary B. Every vector access will be a multiple of B
2242 and so we are guaranteed to access a non-gap element in the
2243 same B-sized block. */
2244 if (would_overrun_p
2245 && !masked_p
2246 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2247 / vect_get_scalar_dr_size (first_dr_info)))
2248 would_overrun_p = false;
2250 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2251 && (can_overrun_p || !would_overrun_p)
2252 && compare_step_with_zero (vinfo, stmt_info) > 0)
2254 /* First cope with the degenerate case of a single-element
2255 vector. */
2256 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2259 /* Otherwise try using LOAD/STORE_LANES. */
2260 else if (vls_type == VLS_LOAD
2261 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2262 : vect_store_lanes_supported (vectype, group_size,
2263 masked_p))
2265 *memory_access_type = VMAT_LOAD_STORE_LANES;
2266 overrun_p = would_overrun_p;
2269 /* If that fails, try using permuting loads. */
2270 else if (vls_type == VLS_LOAD
2271 ? vect_grouped_load_supported (vectype, single_element_p,
2272 group_size)
2273 : vect_grouped_store_supported (vectype, group_size))
2275 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2276 overrun_p = would_overrun_p;
2280 /* As a last resort, trying using a gather load or scatter store.
2282 ??? Although the code can handle all group sizes correctly,
2283 it probably isn't a win to use separate strided accesses based
2284 on nearby locations. Or, even if it's a win over scalar code,
2285 it might not be a win over vectorizing at a lower VF, if that
2286 allows us to use contiguous accesses. */
2287 if (*memory_access_type == VMAT_ELEMENTWISE
2288 && single_element_p
2289 && loop_vinfo
2290 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2291 masked_p, gs_info))
2292 *memory_access_type = VMAT_GATHER_SCATTER;
2295 if (*memory_access_type == VMAT_GATHER_SCATTER
2296 || *memory_access_type == VMAT_ELEMENTWISE)
2297 *alignment_support_scheme = dr_unaligned_supported;
2298 else
2299 *alignment_support_scheme
2300 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, false);
2302 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2304 /* STMT is the leader of the group. Check the operands of all the
2305 stmts of the group. */
2306 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2307 while (next_stmt_info)
2309 tree op = vect_get_store_rhs (next_stmt_info);
2310 enum vect_def_type dt;
2311 if (!vect_is_simple_use (op, vinfo, &dt))
2313 if (dump_enabled_p ())
2314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2315 "use not simple.\n");
2316 return false;
2318 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2322 if (overrun_p)
2324 gcc_assert (can_overrun_p);
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2327 "Data access with gaps requires scalar "
2328 "epilogue loop\n");
2329 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2332 return true;
2335 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2336 if there is a memory access type that the vectorized form can use,
2337 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2338 or scatters, fill in GS_INFO accordingly. In addition
2339 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2340 the target does not support the alignment scheme.
2342 SLP says whether we're performing SLP rather than loop vectorization.
2343 MASKED_P is true if the statement is conditional on a vectorized mask.
2344 VECTYPE is the vector type that the vectorized statements will use.
2345 NCOPIES is the number of vector statements that will be needed. */
2347 static bool
2348 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2349 tree vectype, slp_tree slp_node,
2350 bool masked_p, vec_load_store_type vls_type,
2351 unsigned int ncopies,
2352 vect_memory_access_type *memory_access_type,
2353 dr_alignment_support *alignment_support_scheme,
2354 gather_scatter_info *gs_info)
2356 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2357 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2358 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2360 *memory_access_type = VMAT_GATHER_SCATTER;
2361 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2362 gcc_unreachable ();
2363 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2364 &gs_info->offset_dt,
2365 &gs_info->offset_vectype))
2367 if (dump_enabled_p ())
2368 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2369 "%s index use not simple.\n",
2370 vls_type == VLS_LOAD ? "gather" : "scatter");
2371 return false;
2373 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2375 if (vls_type != VLS_LOAD)
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2379 "unsupported emulated scatter.\n");
2380 return false;
2382 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2383 || !TYPE_VECTOR_SUBPARTS
2384 (gs_info->offset_vectype).is_constant ()
2385 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2386 (gs_info->offset_vectype),
2387 TYPE_VECTOR_SUBPARTS (vectype)))
2389 if (dump_enabled_p ())
2390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2391 "unsupported vector types for emulated "
2392 "gather.\n");
2393 return false;
2396 /* Gather-scatter accesses perform only component accesses, alignment
2397 is irrelevant for them. */
2398 *alignment_support_scheme = dr_unaligned_supported;
2400 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2402 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2403 masked_p,
2404 vls_type, memory_access_type,
2405 alignment_support_scheme, gs_info))
2406 return false;
2408 else if (STMT_VINFO_STRIDED_P (stmt_info))
2410 gcc_assert (!slp_node);
2411 if (loop_vinfo
2412 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2413 masked_p, gs_info))
2414 *memory_access_type = VMAT_GATHER_SCATTER;
2415 else
2416 *memory_access_type = VMAT_ELEMENTWISE;
2417 /* Alignment is irrelevant here. */
2418 *alignment_support_scheme = dr_unaligned_supported;
2420 else
2422 int cmp = compare_step_with_zero (vinfo, stmt_info);
2423 if (cmp == 0)
2425 gcc_assert (vls_type == VLS_LOAD);
2426 *memory_access_type = VMAT_INVARIANT;
2427 /* Invariant accesses perform only component accesses, alignment
2428 is irrelevant for them. */
2429 *alignment_support_scheme = dr_unaligned_supported;
2431 else
2433 if (cmp < 0)
2434 *memory_access_type = get_negative_load_store_type
2435 (vinfo, stmt_info, vectype, vls_type, ncopies);
2436 else
2437 *memory_access_type = VMAT_CONTIGUOUS;
2438 *alignment_support_scheme
2439 = vect_supportable_dr_alignment (vinfo,
2440 STMT_VINFO_DR_INFO (stmt_info),
2441 vectype, false);
2445 if ((*memory_access_type == VMAT_ELEMENTWISE
2446 || *memory_access_type == VMAT_STRIDED_SLP)
2447 && !nunits.is_constant ())
2449 if (dump_enabled_p ())
2450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2451 "Not using elementwise accesses due to variable "
2452 "vectorization factor.\n");
2453 return false;
2456 if (*alignment_support_scheme == dr_unaligned_unsupported)
2458 if (dump_enabled_p ())
2459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2460 "unsupported unaligned access\n");
2461 return false;
2464 /* FIXME: At the moment the cost model seems to underestimate the
2465 cost of using elementwise accesses. This check preserves the
2466 traditional behavior until that can be fixed. */
2467 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2468 if (!first_stmt_info)
2469 first_stmt_info = stmt_info;
2470 if (*memory_access_type == VMAT_ELEMENTWISE
2471 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2472 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2473 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2474 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2476 if (dump_enabled_p ())
2477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2478 "not falling back to elementwise accesses\n");
2479 return false;
2481 return true;
2484 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2485 conditional operation STMT_INFO. When returning true, store the mask
2486 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2487 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2488 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2490 static bool
2491 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2492 slp_tree slp_node, unsigned mask_index,
2493 tree *mask, slp_tree *mask_node,
2494 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2496 enum vect_def_type mask_dt;
2497 tree mask_vectype;
2498 slp_tree mask_node_1;
2499 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2500 mask, &mask_node_1, &mask_dt, &mask_vectype))
2502 if (dump_enabled_p ())
2503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2504 "mask use not simple.\n");
2505 return false;
2508 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2512 "mask argument is not a boolean.\n");
2513 return false;
2516 /* If the caller is not prepared for adjusting an external/constant
2517 SLP mask vector type fail. */
2518 if (slp_node
2519 && !mask_node
2520 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2522 if (dump_enabled_p ())
2523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2524 "SLP mask argument is not vectorized.\n");
2525 return false;
2528 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2529 if (!mask_vectype)
2530 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2532 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2534 if (dump_enabled_p ())
2535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2536 "could not find an appropriate vector mask type.\n");
2537 return false;
2540 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2541 TYPE_VECTOR_SUBPARTS (vectype)))
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "vector mask type %T"
2546 " does not match vector data type %T.\n",
2547 mask_vectype, vectype);
2549 return false;
2552 *mask_dt_out = mask_dt;
2553 *mask_vectype_out = mask_vectype;
2554 if (mask_node)
2555 *mask_node = mask_node_1;
2556 return true;
2559 /* Return true if stored value RHS is suitable for vectorizing store
2560 statement STMT_INFO. When returning true, store the type of the
2561 definition in *RHS_DT_OUT, the type of the vectorized store value in
2562 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2564 static bool
2565 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2566 slp_tree slp_node, tree rhs,
2567 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2568 vec_load_store_type *vls_type_out)
2570 /* In the case this is a store from a constant make sure
2571 native_encode_expr can handle it. */
2572 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2574 if (dump_enabled_p ())
2575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2576 "cannot encode constant as a byte sequence.\n");
2577 return false;
2580 unsigned op_no = 0;
2581 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2583 if (gimple_call_internal_p (call)
2584 && internal_store_fn_p (gimple_call_internal_fn (call)))
2585 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2588 enum vect_def_type rhs_dt;
2589 tree rhs_vectype;
2590 slp_tree slp_op;
2591 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2592 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2594 if (dump_enabled_p ())
2595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2596 "use not simple.\n");
2597 return false;
2600 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2601 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2603 if (dump_enabled_p ())
2604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2605 "incompatible vector types.\n");
2606 return false;
2609 *rhs_dt_out = rhs_dt;
2610 *rhs_vectype_out = rhs_vectype;
2611 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2612 *vls_type_out = VLS_STORE_INVARIANT;
2613 else
2614 *vls_type_out = VLS_STORE;
2615 return true;
2618 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2619 Note that we support masks with floating-point type, in which case the
2620 floats are interpreted as a bitmask. */
2622 static tree
2623 vect_build_all_ones_mask (vec_info *vinfo,
2624 stmt_vec_info stmt_info, tree masktype)
2626 if (TREE_CODE (masktype) == INTEGER_TYPE)
2627 return build_int_cst (masktype, -1);
2628 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2630 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2631 mask = build_vector_from_val (masktype, mask);
2632 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2634 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2636 REAL_VALUE_TYPE r;
2637 long tmp[6];
2638 for (int j = 0; j < 6; ++j)
2639 tmp[j] = -1;
2640 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2641 tree mask = build_real (TREE_TYPE (masktype), r);
2642 mask = build_vector_from_val (masktype, mask);
2643 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2645 gcc_unreachable ();
2648 /* Build an all-zero merge value of type VECTYPE while vectorizing
2649 STMT_INFO as a gather load. */
2651 static tree
2652 vect_build_zero_merge_argument (vec_info *vinfo,
2653 stmt_vec_info stmt_info, tree vectype)
2655 tree merge;
2656 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2657 merge = build_int_cst (TREE_TYPE (vectype), 0);
2658 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2660 REAL_VALUE_TYPE r;
2661 long tmp[6];
2662 for (int j = 0; j < 6; ++j)
2663 tmp[j] = 0;
2664 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2665 merge = build_real (TREE_TYPE (vectype), r);
2667 else
2668 gcc_unreachable ();
2669 merge = build_vector_from_val (vectype, merge);
2670 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2673 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2674 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2675 the gather load operation. If the load is conditional, MASK is the
2676 unvectorized condition and MASK_DT is its definition type, otherwise
2677 MASK is null. */
2679 static void
2680 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2681 gimple_stmt_iterator *gsi,
2682 gimple **vec_stmt,
2683 gather_scatter_info *gs_info,
2684 tree mask)
2686 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2687 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2688 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2689 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2690 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2691 edge pe = loop_preheader_edge (loop);
2692 enum { NARROW, NONE, WIDEN } modifier;
2693 poly_uint64 gather_off_nunits
2694 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2696 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2697 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2698 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2699 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2700 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2701 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2702 tree scaletype = TREE_VALUE (arglist);
2703 tree real_masktype = masktype;
2704 gcc_checking_assert (types_compatible_p (srctype, rettype)
2705 && (!mask
2706 || TREE_CODE (masktype) == INTEGER_TYPE
2707 || types_compatible_p (srctype, masktype)));
2708 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2709 masktype = truth_type_for (srctype);
2711 tree mask_halftype = masktype;
2712 tree perm_mask = NULL_TREE;
2713 tree mask_perm_mask = NULL_TREE;
2714 if (known_eq (nunits, gather_off_nunits))
2715 modifier = NONE;
2716 else if (known_eq (nunits * 2, gather_off_nunits))
2718 modifier = WIDEN;
2720 /* Currently widening gathers and scatters are only supported for
2721 fixed-length vectors. */
2722 int count = gather_off_nunits.to_constant ();
2723 vec_perm_builder sel (count, count, 1);
2724 for (int i = 0; i < count; ++i)
2725 sel.quick_push (i | (count / 2));
2727 vec_perm_indices indices (sel, 1, count);
2728 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2729 indices);
2731 else if (known_eq (nunits, gather_off_nunits * 2))
2733 modifier = NARROW;
2735 /* Currently narrowing gathers and scatters are only supported for
2736 fixed-length vectors. */
2737 int count = nunits.to_constant ();
2738 vec_perm_builder sel (count, count, 1);
2739 sel.quick_grow (count);
2740 for (int i = 0; i < count; ++i)
2741 sel[i] = i < count / 2 ? i : i + count / 2;
2742 vec_perm_indices indices (sel, 2, count);
2743 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2745 ncopies *= 2;
2747 if (mask && masktype == real_masktype)
2749 for (int i = 0; i < count; ++i)
2750 sel[i] = i | (count / 2);
2751 indices.new_vector (sel, 2, count);
2752 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2754 else if (mask)
2755 mask_halftype = truth_type_for (gs_info->offset_vectype);
2757 else
2758 gcc_unreachable ();
2760 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2761 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2763 tree ptr = fold_convert (ptrtype, gs_info->base);
2764 if (!is_gimple_min_invariant (ptr))
2766 gimple_seq seq;
2767 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2768 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2769 gcc_assert (!new_bb);
2772 tree scale = build_int_cst (scaletype, gs_info->scale);
2774 tree vec_oprnd0 = NULL_TREE;
2775 tree vec_mask = NULL_TREE;
2776 tree src_op = NULL_TREE;
2777 tree mask_op = NULL_TREE;
2778 tree prev_res = NULL_TREE;
2780 if (!mask)
2782 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2783 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2786 auto_vec<tree> vec_oprnds0;
2787 auto_vec<tree> vec_masks;
2788 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2789 modifier == WIDEN ? ncopies / 2 : ncopies,
2790 gs_info->offset, &vec_oprnds0);
2791 if (mask)
2792 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2793 modifier == NARROW ? ncopies / 2 : ncopies,
2794 mask, &vec_masks);
2795 for (int j = 0; j < ncopies; ++j)
2797 tree op, var;
2798 if (modifier == WIDEN && (j & 1))
2799 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2800 perm_mask, stmt_info, gsi);
2801 else
2802 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2804 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2806 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2807 TYPE_VECTOR_SUBPARTS (idxtype)));
2808 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2809 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2810 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2811 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2812 op = var;
2815 if (mask)
2817 if (mask_perm_mask && (j & 1))
2818 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2819 mask_perm_mask, stmt_info, gsi);
2820 else
2822 if (modifier == NARROW)
2824 if ((j & 1) == 0)
2825 vec_mask = vec_masks[j / 2];
2827 else
2828 vec_mask = vec_masks[j];
2830 mask_op = vec_mask;
2831 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2833 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2834 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2835 gcc_assert (known_eq (sub1, sub2));
2836 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2837 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2838 gassign *new_stmt
2839 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2840 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2841 mask_op = var;
2844 if (modifier == NARROW && masktype != real_masktype)
2846 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2847 gassign *new_stmt
2848 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2849 : VEC_UNPACK_LO_EXPR,
2850 mask_op);
2851 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2852 mask_op = var;
2854 src_op = mask_op;
2857 tree mask_arg = mask_op;
2858 if (masktype != real_masktype)
2860 tree utype, optype = TREE_TYPE (mask_op);
2861 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2862 utype = real_masktype;
2863 else
2864 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2865 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2866 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2867 gassign *new_stmt
2868 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2869 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2870 mask_arg = var;
2871 if (!useless_type_conversion_p (real_masktype, utype))
2873 gcc_assert (TYPE_PRECISION (utype)
2874 <= TYPE_PRECISION (real_masktype));
2875 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2876 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2877 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2878 mask_arg = var;
2880 src_op = build_zero_cst (srctype);
2882 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2883 mask_arg, scale);
2885 if (!useless_type_conversion_p (vectype, rettype))
2887 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2888 TYPE_VECTOR_SUBPARTS (rettype)));
2889 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2890 gimple_call_set_lhs (new_stmt, op);
2891 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2892 var = make_ssa_name (vec_dest);
2893 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2894 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2895 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2897 else
2899 var = make_ssa_name (vec_dest, new_stmt);
2900 gimple_call_set_lhs (new_stmt, var);
2901 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2904 if (modifier == NARROW)
2906 if ((j & 1) == 0)
2908 prev_res = var;
2909 continue;
2911 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2912 stmt_info, gsi);
2913 new_stmt = SSA_NAME_DEF_STMT (var);
2916 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2918 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2921 /* Prepare the base and offset in GS_INFO for vectorization.
2922 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2923 to the vectorized offset argument for the first copy of STMT_INFO.
2924 STMT_INFO is the statement described by GS_INFO and LOOP is the
2925 containing loop. */
2927 static void
2928 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2929 class loop *loop, stmt_vec_info stmt_info,
2930 gather_scatter_info *gs_info,
2931 tree *dataref_ptr, vec<tree> *vec_offset)
2933 gimple_seq stmts = NULL;
2934 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2935 if (stmts != NULL)
2937 basic_block new_bb;
2938 edge pe = loop_preheader_edge (loop);
2939 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2940 gcc_assert (!new_bb);
2942 unsigned ncopies = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2943 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2944 gs_info->offset, vec_offset,
2945 gs_info->offset_vectype);
2948 /* Prepare to implement a grouped or strided load or store using
2949 the gather load or scatter store operation described by GS_INFO.
2950 STMT_INFO is the load or store statement.
2952 Set *DATAREF_BUMP to the amount that should be added to the base
2953 address after each copy of the vectorized statement. Set *VEC_OFFSET
2954 to an invariant offset vector in which element I has the value
2955 I * DR_STEP / SCALE. */
2957 static void
2958 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2959 loop_vec_info loop_vinfo,
2960 gather_scatter_info *gs_info,
2961 tree *dataref_bump, tree *vec_offset)
2963 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2964 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2966 tree bump = size_binop (MULT_EXPR,
2967 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2968 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2969 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2971 /* The offset given in GS_INFO can have pointer type, so use the element
2972 type of the vector instead. */
2973 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2975 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2976 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2977 ssize_int (gs_info->scale));
2978 step = fold_convert (offset_type, step);
2980 /* Create {0, X, X*2, X*3, ...}. */
2981 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2982 build_zero_cst (offset_type), step);
2983 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2986 /* Return the amount that should be added to a vector pointer to move
2987 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2988 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2989 vectorization. */
2991 static tree
2992 vect_get_data_ptr_increment (vec_info *vinfo,
2993 dr_vec_info *dr_info, tree aggr_type,
2994 vect_memory_access_type memory_access_type)
2996 if (memory_access_type == VMAT_INVARIANT)
2997 return size_zero_node;
2999 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3000 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3001 if (tree_int_cst_sgn (step) == -1)
3002 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3003 return iv_step;
3006 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3008 static bool
3009 vectorizable_bswap (vec_info *vinfo,
3010 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3011 gimple **vec_stmt, slp_tree slp_node,
3012 slp_tree *slp_op,
3013 tree vectype_in, stmt_vector_for_cost *cost_vec)
3015 tree op, vectype;
3016 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3017 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3018 unsigned ncopies;
3020 op = gimple_call_arg (stmt, 0);
3021 vectype = STMT_VINFO_VECTYPE (stmt_info);
3022 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3024 /* Multiple types in SLP are handled by creating the appropriate number of
3025 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3026 case of SLP. */
3027 if (slp_node)
3028 ncopies = 1;
3029 else
3030 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3032 gcc_assert (ncopies >= 1);
3034 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3035 if (! char_vectype)
3036 return false;
3038 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3039 unsigned word_bytes;
3040 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3041 return false;
3043 /* The encoding uses one stepped pattern for each byte in the word. */
3044 vec_perm_builder elts (num_bytes, word_bytes, 3);
3045 for (unsigned i = 0; i < 3; ++i)
3046 for (unsigned j = 0; j < word_bytes; ++j)
3047 elts.quick_push ((i + 1) * word_bytes - j - 1);
3049 vec_perm_indices indices (elts, 1, num_bytes);
3050 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3051 return false;
3053 if (! vec_stmt)
3055 if (slp_node
3056 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3058 if (dump_enabled_p ())
3059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3060 "incompatible vector types for invariants\n");
3061 return false;
3064 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3065 DUMP_VECT_SCOPE ("vectorizable_bswap");
3066 record_stmt_cost (cost_vec,
3067 1, vector_stmt, stmt_info, 0, vect_prologue);
3068 record_stmt_cost (cost_vec,
3069 slp_node
3070 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3071 vec_perm, stmt_info, 0, vect_body);
3072 return true;
3075 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3077 /* Transform. */
3078 vec<tree> vec_oprnds = vNULL;
3079 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3080 op, &vec_oprnds);
3081 /* Arguments are ready. create the new vector stmt. */
3082 unsigned i;
3083 tree vop;
3084 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3086 gimple *new_stmt;
3087 tree tem = make_ssa_name (char_vectype);
3088 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3089 char_vectype, vop));
3090 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3091 tree tem2 = make_ssa_name (char_vectype);
3092 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3093 tem, tem, bswap_vconst);
3094 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3095 tem = make_ssa_name (vectype);
3096 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3097 vectype, tem2));
3098 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3099 if (slp_node)
3100 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3101 else
3102 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3105 if (!slp_node)
3106 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3108 vec_oprnds.release ();
3109 return true;
3112 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3113 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3114 in a single step. On success, store the binary pack code in
3115 *CONVERT_CODE. */
3117 static bool
3118 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3119 tree_code *convert_code)
3121 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3122 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3123 return false;
3125 tree_code code;
3126 int multi_step_cvt = 0;
3127 auto_vec <tree, 8> interm_types;
3128 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3129 &code, &multi_step_cvt, &interm_types)
3130 || multi_step_cvt)
3131 return false;
3133 *convert_code = code;
3134 return true;
3137 /* Function vectorizable_call.
3139 Check if STMT_INFO performs a function call that can be vectorized.
3140 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3141 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3142 Return true if STMT_INFO is vectorizable in this way. */
3144 static bool
3145 vectorizable_call (vec_info *vinfo,
3146 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3147 gimple **vec_stmt, slp_tree slp_node,
3148 stmt_vector_for_cost *cost_vec)
3150 gcall *stmt;
3151 tree vec_dest;
3152 tree scalar_dest;
3153 tree op;
3154 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3155 tree vectype_out, vectype_in;
3156 poly_uint64 nunits_in;
3157 poly_uint64 nunits_out;
3158 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3159 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3160 tree fndecl, new_temp, rhs_type;
3161 enum vect_def_type dt[4]
3162 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3163 vect_unknown_def_type };
3164 tree vectypes[ARRAY_SIZE (dt)] = {};
3165 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3166 int ndts = ARRAY_SIZE (dt);
3167 int ncopies, j;
3168 auto_vec<tree, 8> vargs;
3169 auto_vec<tree, 8> orig_vargs;
3170 enum { NARROW, NONE, WIDEN } modifier;
3171 size_t i, nargs;
3172 tree lhs;
3174 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3175 return false;
3177 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3178 && ! vec_stmt)
3179 return false;
3181 /* Is STMT_INFO a vectorizable call? */
3182 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3183 if (!stmt)
3184 return false;
3186 if (gimple_call_internal_p (stmt)
3187 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3188 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3189 /* Handled by vectorizable_load and vectorizable_store. */
3190 return false;
3192 if (gimple_call_lhs (stmt) == NULL_TREE
3193 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3194 return false;
3196 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3198 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3200 /* Process function arguments. */
3201 rhs_type = NULL_TREE;
3202 vectype_in = NULL_TREE;
3203 nargs = gimple_call_num_args (stmt);
3205 /* Bail out if the function has more than four arguments, we do not have
3206 interesting builtin functions to vectorize with more than two arguments
3207 except for fma. No arguments is also not good. */
3208 if (nargs == 0 || nargs > 4)
3209 return false;
3211 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3212 combined_fn cfn = gimple_call_combined_fn (stmt);
3213 if (cfn == CFN_GOMP_SIMD_LANE)
3215 nargs = 0;
3216 rhs_type = unsigned_type_node;
3219 int mask_opno = -1;
3220 if (internal_fn_p (cfn))
3221 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3223 for (i = 0; i < nargs; i++)
3225 if ((int) i == mask_opno)
3227 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3228 &op, &slp_op[i], &dt[i], &vectypes[i]))
3229 return false;
3230 continue;
3233 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3234 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3236 if (dump_enabled_p ())
3237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3238 "use not simple.\n");
3239 return false;
3242 /* We can only handle calls with arguments of the same type. */
3243 if (rhs_type
3244 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3248 "argument types differ.\n");
3249 return false;
3251 if (!rhs_type)
3252 rhs_type = TREE_TYPE (op);
3254 if (!vectype_in)
3255 vectype_in = vectypes[i];
3256 else if (vectypes[i]
3257 && !types_compatible_p (vectypes[i], vectype_in))
3259 if (dump_enabled_p ())
3260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3261 "argument vector types differ.\n");
3262 return false;
3265 /* If all arguments are external or constant defs, infer the vector type
3266 from the scalar type. */
3267 if (!vectype_in)
3268 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3269 if (vec_stmt)
3270 gcc_assert (vectype_in);
3271 if (!vectype_in)
3273 if (dump_enabled_p ())
3274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3275 "no vectype for scalar type %T\n", rhs_type);
3277 return false;
3279 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3280 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3281 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3282 by a pack of the two vectors into an SI vector. We would need
3283 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3284 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3286 if (dump_enabled_p ())
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3288 "mismatched vector sizes %T and %T\n",
3289 vectype_in, vectype_out);
3290 return false;
3293 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3294 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3296 if (dump_enabled_p ())
3297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3298 "mixed mask and nonmask vector types\n");
3299 return false;
3302 /* FORNOW */
3303 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3304 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3305 if (known_eq (nunits_in * 2, nunits_out))
3306 modifier = NARROW;
3307 else if (known_eq (nunits_out, nunits_in))
3308 modifier = NONE;
3309 else if (known_eq (nunits_out * 2, nunits_in))
3310 modifier = WIDEN;
3311 else
3312 return false;
3314 /* We only handle functions that do not read or clobber memory. */
3315 if (gimple_vuse (stmt))
3317 if (dump_enabled_p ())
3318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3319 "function reads from or writes to memory.\n");
3320 return false;
3323 /* For now, we only vectorize functions if a target specific builtin
3324 is available. TODO -- in some cases, it might be profitable to
3325 insert the calls for pieces of the vector, in order to be able
3326 to vectorize other operations in the loop. */
3327 fndecl = NULL_TREE;
3328 internal_fn ifn = IFN_LAST;
3329 tree callee = gimple_call_fndecl (stmt);
3331 /* First try using an internal function. */
3332 tree_code convert_code = ERROR_MARK;
3333 if (cfn != CFN_LAST
3334 && (modifier == NONE
3335 || (modifier == NARROW
3336 && simple_integer_narrowing (vectype_out, vectype_in,
3337 &convert_code))))
3338 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3339 vectype_in);
3341 /* If that fails, try asking for a target-specific built-in function. */
3342 if (ifn == IFN_LAST)
3344 if (cfn != CFN_LAST)
3345 fndecl = targetm.vectorize.builtin_vectorized_function
3346 (cfn, vectype_out, vectype_in);
3347 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3348 fndecl = targetm.vectorize.builtin_md_vectorized_function
3349 (callee, vectype_out, vectype_in);
3352 if (ifn == IFN_LAST && !fndecl)
3354 if (cfn == CFN_GOMP_SIMD_LANE
3355 && !slp_node
3356 && loop_vinfo
3357 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3358 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3359 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3360 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3362 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3363 { 0, 1, 2, ... vf - 1 } vector. */
3364 gcc_assert (nargs == 0);
3366 else if (modifier == NONE
3367 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3368 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3369 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3370 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3371 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3372 slp_op, vectype_in, cost_vec);
3373 else
3375 if (dump_enabled_p ())
3376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3377 "function is not vectorizable.\n");
3378 return false;
3382 if (slp_node)
3383 ncopies = 1;
3384 else if (modifier == NARROW && ifn == IFN_LAST)
3385 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3386 else
3387 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3389 /* Sanity check: make sure that at least one copy of the vectorized stmt
3390 needs to be generated. */
3391 gcc_assert (ncopies >= 1);
3393 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3394 if (!vec_stmt) /* transformation not required. */
3396 if (slp_node)
3397 for (i = 0; i < nargs; ++i)
3398 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3400 if (dump_enabled_p ())
3401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3402 "incompatible vector types for invariants\n");
3403 return false;
3405 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3406 DUMP_VECT_SCOPE ("vectorizable_call");
3407 vect_model_simple_cost (vinfo, stmt_info,
3408 ncopies, dt, ndts, slp_node, cost_vec);
3409 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3410 record_stmt_cost (cost_vec, ncopies / 2,
3411 vec_promote_demote, stmt_info, 0, vect_body);
3413 if (loop_vinfo && mask_opno >= 0)
3415 unsigned int nvectors = (slp_node
3416 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3417 : ncopies);
3418 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3419 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3420 vectype_out, scalar_mask);
3422 return true;
3425 /* Transform. */
3427 if (dump_enabled_p ())
3428 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3430 /* Handle def. */
3431 scalar_dest = gimple_call_lhs (stmt);
3432 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3434 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3436 if (modifier == NONE || ifn != IFN_LAST)
3438 tree prev_res = NULL_TREE;
3439 vargs.safe_grow (nargs, true);
3440 orig_vargs.safe_grow (nargs, true);
3441 auto_vec<vec<tree> > vec_defs (nargs);
3442 for (j = 0; j < ncopies; ++j)
3444 /* Build argument list for the vectorized call. */
3445 if (slp_node)
3447 vec<tree> vec_oprnds0;
3449 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3450 vec_oprnds0 = vec_defs[0];
3452 /* Arguments are ready. Create the new vector stmt. */
3453 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3455 size_t k;
3456 for (k = 0; k < nargs; k++)
3458 vec<tree> vec_oprndsk = vec_defs[k];
3459 vargs[k] = vec_oprndsk[i];
3461 gimple *new_stmt;
3462 if (modifier == NARROW)
3464 /* We don't define any narrowing conditional functions
3465 at present. */
3466 gcc_assert (mask_opno < 0);
3467 tree half_res = make_ssa_name (vectype_in);
3468 gcall *call
3469 = gimple_build_call_internal_vec (ifn, vargs);
3470 gimple_call_set_lhs (call, half_res);
3471 gimple_call_set_nothrow (call, true);
3472 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3473 if ((i & 1) == 0)
3475 prev_res = half_res;
3476 continue;
3478 new_temp = make_ssa_name (vec_dest);
3479 new_stmt = gimple_build_assign (new_temp, convert_code,
3480 prev_res, half_res);
3481 vect_finish_stmt_generation (vinfo, stmt_info,
3482 new_stmt, gsi);
3484 else
3486 if (mask_opno >= 0 && masked_loop_p)
3488 unsigned int vec_num = vec_oprnds0.length ();
3489 /* Always true for SLP. */
3490 gcc_assert (ncopies == 1);
3491 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3492 vectype_out, i);
3493 vargs[mask_opno] = prepare_load_store_mask
3494 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3497 gcall *call;
3498 if (ifn != IFN_LAST)
3499 call = gimple_build_call_internal_vec (ifn, vargs);
3500 else
3501 call = gimple_build_call_vec (fndecl, vargs);
3502 new_temp = make_ssa_name (vec_dest, call);
3503 gimple_call_set_lhs (call, new_temp);
3504 gimple_call_set_nothrow (call, true);
3505 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3506 new_stmt = call;
3508 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3510 continue;
3513 for (i = 0; i < nargs; i++)
3515 op = gimple_call_arg (stmt, i);
3516 if (j == 0)
3518 vec_defs.quick_push (vNULL);
3519 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3520 op, &vec_defs[i],
3521 vectypes[i]);
3523 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3526 if (mask_opno >= 0 && masked_loop_p)
3528 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3529 vectype_out, j);
3530 vargs[mask_opno]
3531 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3532 vargs[mask_opno], gsi);
3535 gimple *new_stmt;
3536 if (cfn == CFN_GOMP_SIMD_LANE)
3538 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3539 tree new_var
3540 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3541 gimple *init_stmt = gimple_build_assign (new_var, cst);
3542 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3543 new_temp = make_ssa_name (vec_dest);
3544 new_stmt = gimple_build_assign (new_temp, new_var);
3545 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3547 else if (modifier == NARROW)
3549 /* We don't define any narrowing conditional functions at
3550 present. */
3551 gcc_assert (mask_opno < 0);
3552 tree half_res = make_ssa_name (vectype_in);
3553 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3554 gimple_call_set_lhs (call, half_res);
3555 gimple_call_set_nothrow (call, true);
3556 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3557 if ((j & 1) == 0)
3559 prev_res = half_res;
3560 continue;
3562 new_temp = make_ssa_name (vec_dest);
3563 new_stmt = gimple_build_assign (new_temp, convert_code,
3564 prev_res, half_res);
3565 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3567 else
3569 gcall *call;
3570 if (ifn != IFN_LAST)
3571 call = gimple_build_call_internal_vec (ifn, vargs);
3572 else
3573 call = gimple_build_call_vec (fndecl, vargs);
3574 new_temp = make_ssa_name (vec_dest, call);
3575 gimple_call_set_lhs (call, new_temp);
3576 gimple_call_set_nothrow (call, true);
3577 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3578 new_stmt = call;
3581 if (j == (modifier == NARROW ? 1 : 0))
3582 *vec_stmt = new_stmt;
3583 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3585 for (i = 0; i < nargs; i++)
3587 vec<tree> vec_oprndsi = vec_defs[i];
3588 vec_oprndsi.release ();
3591 else if (modifier == NARROW)
3593 auto_vec<vec<tree> > vec_defs (nargs);
3594 /* We don't define any narrowing conditional functions at present. */
3595 gcc_assert (mask_opno < 0);
3596 for (j = 0; j < ncopies; ++j)
3598 /* Build argument list for the vectorized call. */
3599 if (j == 0)
3600 vargs.create (nargs * 2);
3601 else
3602 vargs.truncate (0);
3604 if (slp_node)
3606 vec<tree> vec_oprnds0;
3608 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3609 vec_oprnds0 = vec_defs[0];
3611 /* Arguments are ready. Create the new vector stmt. */
3612 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3614 size_t k;
3615 vargs.truncate (0);
3616 for (k = 0; k < nargs; k++)
3618 vec<tree> vec_oprndsk = vec_defs[k];
3619 vargs.quick_push (vec_oprndsk[i]);
3620 vargs.quick_push (vec_oprndsk[i + 1]);
3622 gcall *call;
3623 if (ifn != IFN_LAST)
3624 call = gimple_build_call_internal_vec (ifn, vargs);
3625 else
3626 call = gimple_build_call_vec (fndecl, vargs);
3627 new_temp = make_ssa_name (vec_dest, call);
3628 gimple_call_set_lhs (call, new_temp);
3629 gimple_call_set_nothrow (call, true);
3630 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3631 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3633 continue;
3636 for (i = 0; i < nargs; i++)
3638 op = gimple_call_arg (stmt, i);
3639 if (j == 0)
3641 vec_defs.quick_push (vNULL);
3642 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3643 op, &vec_defs[i], vectypes[i]);
3645 vec_oprnd0 = vec_defs[i][2*j];
3646 vec_oprnd1 = vec_defs[i][2*j+1];
3648 vargs.quick_push (vec_oprnd0);
3649 vargs.quick_push (vec_oprnd1);
3652 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3653 new_temp = make_ssa_name (vec_dest, new_stmt);
3654 gimple_call_set_lhs (new_stmt, new_temp);
3655 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3657 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3660 if (!slp_node)
3661 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3663 for (i = 0; i < nargs; i++)
3665 vec<tree> vec_oprndsi = vec_defs[i];
3666 vec_oprndsi.release ();
3669 else
3670 /* No current target implements this case. */
3671 return false;
3673 vargs.release ();
3675 /* The call in STMT might prevent it from being removed in dce.
3676 We however cannot remove it here, due to the way the ssa name
3677 it defines is mapped to the new definition. So just replace
3678 rhs of the statement with something harmless. */
3680 if (slp_node)
3681 return true;
3683 stmt_info = vect_orig_stmt (stmt_info);
3684 lhs = gimple_get_lhs (stmt_info->stmt);
3686 gassign *new_stmt
3687 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3688 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3690 return true;
3694 struct simd_call_arg_info
3696 tree vectype;
3697 tree op;
3698 HOST_WIDE_INT linear_step;
3699 enum vect_def_type dt;
3700 unsigned int align;
3701 bool simd_lane_linear;
3704 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3705 is linear within simd lane (but not within whole loop), note it in
3706 *ARGINFO. */
3708 static void
3709 vect_simd_lane_linear (tree op, class loop *loop,
3710 struct simd_call_arg_info *arginfo)
3712 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3714 if (!is_gimple_assign (def_stmt)
3715 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3716 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3717 return;
3719 tree base = gimple_assign_rhs1 (def_stmt);
3720 HOST_WIDE_INT linear_step = 0;
3721 tree v = gimple_assign_rhs2 (def_stmt);
3722 while (TREE_CODE (v) == SSA_NAME)
3724 tree t;
3725 def_stmt = SSA_NAME_DEF_STMT (v);
3726 if (is_gimple_assign (def_stmt))
3727 switch (gimple_assign_rhs_code (def_stmt))
3729 case PLUS_EXPR:
3730 t = gimple_assign_rhs2 (def_stmt);
3731 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3732 return;
3733 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3734 v = gimple_assign_rhs1 (def_stmt);
3735 continue;
3736 case MULT_EXPR:
3737 t = gimple_assign_rhs2 (def_stmt);
3738 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3739 return;
3740 linear_step = tree_to_shwi (t);
3741 v = gimple_assign_rhs1 (def_stmt);
3742 continue;
3743 CASE_CONVERT:
3744 t = gimple_assign_rhs1 (def_stmt);
3745 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3746 || (TYPE_PRECISION (TREE_TYPE (v))
3747 < TYPE_PRECISION (TREE_TYPE (t))))
3748 return;
3749 if (!linear_step)
3750 linear_step = 1;
3751 v = t;
3752 continue;
3753 default:
3754 return;
3756 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3757 && loop->simduid
3758 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3759 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3760 == loop->simduid))
3762 if (!linear_step)
3763 linear_step = 1;
3764 arginfo->linear_step = linear_step;
3765 arginfo->op = base;
3766 arginfo->simd_lane_linear = true;
3767 return;
3772 /* Return the number of elements in vector type VECTYPE, which is associated
3773 with a SIMD clone. At present these vectors always have a constant
3774 length. */
3776 static unsigned HOST_WIDE_INT
3777 simd_clone_subparts (tree vectype)
3779 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3782 /* Function vectorizable_simd_clone_call.
3784 Check if STMT_INFO performs a function call that can be vectorized
3785 by calling a simd clone of the function.
3786 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3787 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3788 Return true if STMT_INFO is vectorizable in this way. */
3790 static bool
3791 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3792 gimple_stmt_iterator *gsi,
3793 gimple **vec_stmt, slp_tree slp_node,
3794 stmt_vector_for_cost *)
3796 tree vec_dest;
3797 tree scalar_dest;
3798 tree op, type;
3799 tree vec_oprnd0 = NULL_TREE;
3800 tree vectype;
3801 poly_uint64 nunits;
3802 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3803 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3804 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3805 tree fndecl, new_temp;
3806 int ncopies, j;
3807 auto_vec<simd_call_arg_info> arginfo;
3808 vec<tree> vargs = vNULL;
3809 size_t i, nargs;
3810 tree lhs, rtype, ratype;
3811 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3813 /* Is STMT a vectorizable call? */
3814 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3815 if (!stmt)
3816 return false;
3818 fndecl = gimple_call_fndecl (stmt);
3819 if (fndecl == NULL_TREE)
3820 return false;
3822 struct cgraph_node *node = cgraph_node::get (fndecl);
3823 if (node == NULL || node->simd_clones == NULL)
3824 return false;
3826 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3827 return false;
3829 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3830 && ! vec_stmt)
3831 return false;
3833 if (gimple_call_lhs (stmt)
3834 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3835 return false;
3837 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3839 vectype = STMT_VINFO_VECTYPE (stmt_info);
3841 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3842 return false;
3844 /* FORNOW */
3845 if (slp_node)
3846 return false;
3848 /* Process function arguments. */
3849 nargs = gimple_call_num_args (stmt);
3851 /* Bail out if the function has zero arguments. */
3852 if (nargs == 0)
3853 return false;
3855 arginfo.reserve (nargs, true);
3857 for (i = 0; i < nargs; i++)
3859 simd_call_arg_info thisarginfo;
3860 affine_iv iv;
3862 thisarginfo.linear_step = 0;
3863 thisarginfo.align = 0;
3864 thisarginfo.op = NULL_TREE;
3865 thisarginfo.simd_lane_linear = false;
3867 op = gimple_call_arg (stmt, i);
3868 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3869 &thisarginfo.vectype)
3870 || thisarginfo.dt == vect_uninitialized_def)
3872 if (dump_enabled_p ())
3873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3874 "use not simple.\n");
3875 return false;
3878 if (thisarginfo.dt == vect_constant_def
3879 || thisarginfo.dt == vect_external_def)
3880 gcc_assert (thisarginfo.vectype == NULL_TREE);
3881 else
3883 gcc_assert (thisarginfo.vectype != NULL_TREE);
3884 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3886 if (dump_enabled_p ())
3887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3888 "vector mask arguments are not supported\n");
3889 return false;
3893 /* For linear arguments, the analyze phase should have saved
3894 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3895 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3896 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3898 gcc_assert (vec_stmt);
3899 thisarginfo.linear_step
3900 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3901 thisarginfo.op
3902 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3903 thisarginfo.simd_lane_linear
3904 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3905 == boolean_true_node);
3906 /* If loop has been peeled for alignment, we need to adjust it. */
3907 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3908 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3909 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3911 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3912 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3913 tree opt = TREE_TYPE (thisarginfo.op);
3914 bias = fold_convert (TREE_TYPE (step), bias);
3915 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3916 thisarginfo.op
3917 = fold_build2 (POINTER_TYPE_P (opt)
3918 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3919 thisarginfo.op, bias);
3922 else if (!vec_stmt
3923 && thisarginfo.dt != vect_constant_def
3924 && thisarginfo.dt != vect_external_def
3925 && loop_vinfo
3926 && TREE_CODE (op) == SSA_NAME
3927 && simple_iv (loop, loop_containing_stmt (stmt), op,
3928 &iv, false)
3929 && tree_fits_shwi_p (iv.step))
3931 thisarginfo.linear_step = tree_to_shwi (iv.step);
3932 thisarginfo.op = iv.base;
3934 else if ((thisarginfo.dt == vect_constant_def
3935 || thisarginfo.dt == vect_external_def)
3936 && POINTER_TYPE_P (TREE_TYPE (op)))
3937 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3938 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3939 linear too. */
3940 if (POINTER_TYPE_P (TREE_TYPE (op))
3941 && !thisarginfo.linear_step
3942 && !vec_stmt
3943 && thisarginfo.dt != vect_constant_def
3944 && thisarginfo.dt != vect_external_def
3945 && loop_vinfo
3946 && !slp_node
3947 && TREE_CODE (op) == SSA_NAME)
3948 vect_simd_lane_linear (op, loop, &thisarginfo);
3950 arginfo.quick_push (thisarginfo);
3953 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3954 if (!vf.is_constant ())
3956 if (dump_enabled_p ())
3957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3958 "not considering SIMD clones; not yet supported"
3959 " for variable-width vectors.\n");
3960 return false;
3963 unsigned int badness = 0;
3964 struct cgraph_node *bestn = NULL;
3965 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3966 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3967 else
3968 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3969 n = n->simdclone->next_clone)
3971 unsigned int this_badness = 0;
3972 unsigned int num_calls;
3973 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
3974 || n->simdclone->nargs != nargs)
3975 continue;
3976 if (num_calls != 1)
3977 this_badness += exact_log2 (num_calls) * 4096;
3978 if (n->simdclone->inbranch)
3979 this_badness += 8192;
3980 int target_badness = targetm.simd_clone.usable (n);
3981 if (target_badness < 0)
3982 continue;
3983 this_badness += target_badness * 512;
3984 /* FORNOW: Have to add code to add the mask argument. */
3985 if (n->simdclone->inbranch)
3986 continue;
3987 for (i = 0; i < nargs; i++)
3989 switch (n->simdclone->args[i].arg_type)
3991 case SIMD_CLONE_ARG_TYPE_VECTOR:
3992 if (!useless_type_conversion_p
3993 (n->simdclone->args[i].orig_type,
3994 TREE_TYPE (gimple_call_arg (stmt, i))))
3995 i = -1;
3996 else if (arginfo[i].dt == vect_constant_def
3997 || arginfo[i].dt == vect_external_def
3998 || arginfo[i].linear_step)
3999 this_badness += 64;
4000 break;
4001 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4002 if (arginfo[i].dt != vect_constant_def
4003 && arginfo[i].dt != vect_external_def)
4004 i = -1;
4005 break;
4006 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4007 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4008 if (arginfo[i].dt == vect_constant_def
4009 || arginfo[i].dt == vect_external_def
4010 || (arginfo[i].linear_step
4011 != n->simdclone->args[i].linear_step))
4012 i = -1;
4013 break;
4014 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4015 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4016 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4017 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4018 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4019 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4020 /* FORNOW */
4021 i = -1;
4022 break;
4023 case SIMD_CLONE_ARG_TYPE_MASK:
4024 gcc_unreachable ();
4026 if (i == (size_t) -1)
4027 break;
4028 if (n->simdclone->args[i].alignment > arginfo[i].align)
4030 i = -1;
4031 break;
4033 if (arginfo[i].align)
4034 this_badness += (exact_log2 (arginfo[i].align)
4035 - exact_log2 (n->simdclone->args[i].alignment));
4037 if (i == (size_t) -1)
4038 continue;
4039 if (bestn == NULL || this_badness < badness)
4041 bestn = n;
4042 badness = this_badness;
4046 if (bestn == NULL)
4047 return false;
4049 for (i = 0; i < nargs; i++)
4050 if ((arginfo[i].dt == vect_constant_def
4051 || arginfo[i].dt == vect_external_def)
4052 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4054 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4055 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4056 slp_node);
4057 if (arginfo[i].vectype == NULL
4058 || !constant_multiple_p (bestn->simdclone->simdlen,
4059 simd_clone_subparts (arginfo[i].vectype)))
4060 return false;
4063 fndecl = bestn->decl;
4064 nunits = bestn->simdclone->simdlen;
4065 ncopies = vector_unroll_factor (vf, nunits);
4067 /* If the function isn't const, only allow it in simd loops where user
4068 has asserted that at least nunits consecutive iterations can be
4069 performed using SIMD instructions. */
4070 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4071 && gimple_vuse (stmt))
4072 return false;
4074 /* Sanity check: make sure that at least one copy of the vectorized stmt
4075 needs to be generated. */
4076 gcc_assert (ncopies >= 1);
4078 if (!vec_stmt) /* transformation not required. */
4080 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4081 for (i = 0; i < nargs; i++)
4082 if ((bestn->simdclone->args[i].arg_type
4083 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4084 || (bestn->simdclone->args[i].arg_type
4085 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4087 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4088 + 1,
4089 true);
4090 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4091 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4092 ? size_type_node : TREE_TYPE (arginfo[i].op);
4093 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4094 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4095 tree sll = arginfo[i].simd_lane_linear
4096 ? boolean_true_node : boolean_false_node;
4097 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4099 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4100 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4101 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4102 dt, slp_node, cost_vec); */
4103 return true;
4106 /* Transform. */
4108 if (dump_enabled_p ())
4109 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4111 /* Handle def. */
4112 scalar_dest = gimple_call_lhs (stmt);
4113 vec_dest = NULL_TREE;
4114 rtype = NULL_TREE;
4115 ratype = NULL_TREE;
4116 if (scalar_dest)
4118 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4119 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4120 if (TREE_CODE (rtype) == ARRAY_TYPE)
4122 ratype = rtype;
4123 rtype = TREE_TYPE (ratype);
4127 auto_vec<vec<tree> > vec_oprnds;
4128 auto_vec<unsigned> vec_oprnds_i;
4129 vec_oprnds.safe_grow_cleared (nargs, true);
4130 vec_oprnds_i.safe_grow_cleared (nargs, true);
4131 for (j = 0; j < ncopies; ++j)
4133 /* Build argument list for the vectorized call. */
4134 if (j == 0)
4135 vargs.create (nargs);
4136 else
4137 vargs.truncate (0);
4139 for (i = 0; i < nargs; i++)
4141 unsigned int k, l, m, o;
4142 tree atype;
4143 op = gimple_call_arg (stmt, i);
4144 switch (bestn->simdclone->args[i].arg_type)
4146 case SIMD_CLONE_ARG_TYPE_VECTOR:
4147 atype = bestn->simdclone->args[i].vector_type;
4148 o = vector_unroll_factor (nunits,
4149 simd_clone_subparts (atype));
4150 for (m = j * o; m < (j + 1) * o; m++)
4152 if (simd_clone_subparts (atype)
4153 < simd_clone_subparts (arginfo[i].vectype))
4155 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4156 k = (simd_clone_subparts (arginfo[i].vectype)
4157 / simd_clone_subparts (atype));
4158 gcc_assert ((k & (k - 1)) == 0);
4159 if (m == 0)
4161 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4162 ncopies * o / k, op,
4163 &vec_oprnds[i]);
4164 vec_oprnds_i[i] = 0;
4165 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4167 else
4169 vec_oprnd0 = arginfo[i].op;
4170 if ((m & (k - 1)) == 0)
4171 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4173 arginfo[i].op = vec_oprnd0;
4174 vec_oprnd0
4175 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4176 bitsize_int (prec),
4177 bitsize_int ((m & (k - 1)) * prec));
4178 gassign *new_stmt
4179 = gimple_build_assign (make_ssa_name (atype),
4180 vec_oprnd0);
4181 vect_finish_stmt_generation (vinfo, stmt_info,
4182 new_stmt, gsi);
4183 vargs.safe_push (gimple_assign_lhs (new_stmt));
4185 else
4187 k = (simd_clone_subparts (atype)
4188 / simd_clone_subparts (arginfo[i].vectype));
4189 gcc_assert ((k & (k - 1)) == 0);
4190 vec<constructor_elt, va_gc> *ctor_elts;
4191 if (k != 1)
4192 vec_alloc (ctor_elts, k);
4193 else
4194 ctor_elts = NULL;
4195 for (l = 0; l < k; l++)
4197 if (m == 0 && l == 0)
4199 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4200 k * o * ncopies,
4202 &vec_oprnds[i]);
4203 vec_oprnds_i[i] = 0;
4204 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4206 else
4207 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4208 arginfo[i].op = vec_oprnd0;
4209 if (k == 1)
4210 break;
4211 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4212 vec_oprnd0);
4214 if (k == 1)
4215 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4216 atype))
4218 vec_oprnd0
4219 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4220 gassign *new_stmt
4221 = gimple_build_assign (make_ssa_name (atype),
4222 vec_oprnd0);
4223 vect_finish_stmt_generation (vinfo, stmt_info,
4224 new_stmt, gsi);
4225 vargs.safe_push (gimple_assign_lhs (new_stmt));
4227 else
4228 vargs.safe_push (vec_oprnd0);
4229 else
4231 vec_oprnd0 = build_constructor (atype, ctor_elts);
4232 gassign *new_stmt
4233 = gimple_build_assign (make_ssa_name (atype),
4234 vec_oprnd0);
4235 vect_finish_stmt_generation (vinfo, stmt_info,
4236 new_stmt, gsi);
4237 vargs.safe_push (gimple_assign_lhs (new_stmt));
4241 break;
4242 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4243 vargs.safe_push (op);
4244 break;
4245 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4246 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4247 if (j == 0)
4249 gimple_seq stmts;
4250 arginfo[i].op
4251 = force_gimple_operand (unshare_expr (arginfo[i].op),
4252 &stmts, true, NULL_TREE);
4253 if (stmts != NULL)
4255 basic_block new_bb;
4256 edge pe = loop_preheader_edge (loop);
4257 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4258 gcc_assert (!new_bb);
4260 if (arginfo[i].simd_lane_linear)
4262 vargs.safe_push (arginfo[i].op);
4263 break;
4265 tree phi_res = copy_ssa_name (op);
4266 gphi *new_phi = create_phi_node (phi_res, loop->header);
4267 add_phi_arg (new_phi, arginfo[i].op,
4268 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4269 enum tree_code code
4270 = POINTER_TYPE_P (TREE_TYPE (op))
4271 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4272 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4273 ? sizetype : TREE_TYPE (op);
4274 poly_widest_int cst
4275 = wi::mul (bestn->simdclone->args[i].linear_step,
4276 ncopies * nunits);
4277 tree tcst = wide_int_to_tree (type, cst);
4278 tree phi_arg = copy_ssa_name (op);
4279 gassign *new_stmt
4280 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4281 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4282 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4283 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4284 UNKNOWN_LOCATION);
4285 arginfo[i].op = phi_res;
4286 vargs.safe_push (phi_res);
4288 else
4290 enum tree_code code
4291 = POINTER_TYPE_P (TREE_TYPE (op))
4292 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4293 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4294 ? sizetype : TREE_TYPE (op);
4295 poly_widest_int cst
4296 = wi::mul (bestn->simdclone->args[i].linear_step,
4297 j * nunits);
4298 tree tcst = wide_int_to_tree (type, cst);
4299 new_temp = make_ssa_name (TREE_TYPE (op));
4300 gassign *new_stmt
4301 = gimple_build_assign (new_temp, code,
4302 arginfo[i].op, tcst);
4303 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4304 vargs.safe_push (new_temp);
4306 break;
4307 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4308 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4309 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4310 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4311 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4312 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4313 default:
4314 gcc_unreachable ();
4318 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4319 if (vec_dest)
4321 gcc_assert (ratype
4322 || known_eq (simd_clone_subparts (rtype), nunits));
4323 if (ratype)
4324 new_temp = create_tmp_var (ratype);
4325 else if (useless_type_conversion_p (vectype, rtype))
4326 new_temp = make_ssa_name (vec_dest, new_call);
4327 else
4328 new_temp = make_ssa_name (rtype, new_call);
4329 gimple_call_set_lhs (new_call, new_temp);
4331 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4332 gimple *new_stmt = new_call;
4334 if (vec_dest)
4336 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4338 unsigned int k, l;
4339 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4340 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4341 k = vector_unroll_factor (nunits,
4342 simd_clone_subparts (vectype));
4343 gcc_assert ((k & (k - 1)) == 0);
4344 for (l = 0; l < k; l++)
4346 tree t;
4347 if (ratype)
4349 t = build_fold_addr_expr (new_temp);
4350 t = build2 (MEM_REF, vectype, t,
4351 build_int_cst (TREE_TYPE (t), l * bytes));
4353 else
4354 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4355 bitsize_int (prec), bitsize_int (l * prec));
4356 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4357 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4359 if (j == 0 && l == 0)
4360 *vec_stmt = new_stmt;
4361 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4364 if (ratype)
4365 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4366 continue;
4368 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4370 unsigned int k = (simd_clone_subparts (vectype)
4371 / simd_clone_subparts (rtype));
4372 gcc_assert ((k & (k - 1)) == 0);
4373 if ((j & (k - 1)) == 0)
4374 vec_alloc (ret_ctor_elts, k);
4375 if (ratype)
4377 unsigned int m, o;
4378 o = vector_unroll_factor (nunits,
4379 simd_clone_subparts (rtype));
4380 for (m = 0; m < o; m++)
4382 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4383 size_int (m), NULL_TREE, NULL_TREE);
4384 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4385 tem);
4386 vect_finish_stmt_generation (vinfo, stmt_info,
4387 new_stmt, gsi);
4388 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4389 gimple_assign_lhs (new_stmt));
4391 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4393 else
4394 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4395 if ((j & (k - 1)) != k - 1)
4396 continue;
4397 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4398 new_stmt
4399 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4400 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4402 if ((unsigned) j == k - 1)
4403 *vec_stmt = new_stmt;
4404 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4405 continue;
4407 else if (ratype)
4409 tree t = build_fold_addr_expr (new_temp);
4410 t = build2 (MEM_REF, vectype, t,
4411 build_int_cst (TREE_TYPE (t), 0));
4412 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4413 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4414 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4416 else if (!useless_type_conversion_p (vectype, rtype))
4418 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4419 new_stmt
4420 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4421 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4425 if (j == 0)
4426 *vec_stmt = new_stmt;
4427 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4430 for (i = 0; i < nargs; ++i)
4432 vec<tree> oprndsi = vec_oprnds[i];
4433 oprndsi.release ();
4435 vargs.release ();
4437 /* The call in STMT might prevent it from being removed in dce.
4438 We however cannot remove it here, due to the way the ssa name
4439 it defines is mapped to the new definition. So just replace
4440 rhs of the statement with something harmless. */
4442 if (slp_node)
4443 return true;
4445 gimple *new_stmt;
4446 if (scalar_dest)
4448 type = TREE_TYPE (scalar_dest);
4449 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4450 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4452 else
4453 new_stmt = gimple_build_nop ();
4454 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4455 unlink_stmt_vdef (stmt);
4457 return true;
4461 /* Function vect_gen_widened_results_half
4463 Create a vector stmt whose code, type, number of arguments, and result
4464 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4465 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4466 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4467 needs to be created (DECL is a function-decl of a target-builtin).
4468 STMT_INFO is the original scalar stmt that we are vectorizing. */
4470 static gimple *
4471 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4472 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4473 tree vec_dest, gimple_stmt_iterator *gsi,
4474 stmt_vec_info stmt_info)
4476 gimple *new_stmt;
4477 tree new_temp;
4479 /* Generate half of the widened result: */
4480 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4481 if (op_type != binary_op)
4482 vec_oprnd1 = NULL;
4483 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4484 new_temp = make_ssa_name (vec_dest, new_stmt);
4485 gimple_assign_set_lhs (new_stmt, new_temp);
4486 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4488 return new_stmt;
4492 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4493 For multi-step conversions store the resulting vectors and call the function
4494 recursively. */
4496 static void
4497 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4498 int multi_step_cvt,
4499 stmt_vec_info stmt_info,
4500 vec<tree> &vec_dsts,
4501 gimple_stmt_iterator *gsi,
4502 slp_tree slp_node, enum tree_code code)
4504 unsigned int i;
4505 tree vop0, vop1, new_tmp, vec_dest;
4507 vec_dest = vec_dsts.pop ();
4509 for (i = 0; i < vec_oprnds->length (); i += 2)
4511 /* Create demotion operation. */
4512 vop0 = (*vec_oprnds)[i];
4513 vop1 = (*vec_oprnds)[i + 1];
4514 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4515 new_tmp = make_ssa_name (vec_dest, new_stmt);
4516 gimple_assign_set_lhs (new_stmt, new_tmp);
4517 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4519 if (multi_step_cvt)
4520 /* Store the resulting vector for next recursive call. */
4521 (*vec_oprnds)[i/2] = new_tmp;
4522 else
4524 /* This is the last step of the conversion sequence. Store the
4525 vectors in SLP_NODE or in vector info of the scalar statement
4526 (or in STMT_VINFO_RELATED_STMT chain). */
4527 if (slp_node)
4528 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4529 else
4530 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4534 /* For multi-step demotion operations we first generate demotion operations
4535 from the source type to the intermediate types, and then combine the
4536 results (stored in VEC_OPRNDS) in demotion operation to the destination
4537 type. */
4538 if (multi_step_cvt)
4540 /* At each level of recursion we have half of the operands we had at the
4541 previous level. */
4542 vec_oprnds->truncate ((i+1)/2);
4543 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4544 multi_step_cvt - 1,
4545 stmt_info, vec_dsts, gsi,
4546 slp_node, VEC_PACK_TRUNC_EXPR);
4549 vec_dsts.quick_push (vec_dest);
4553 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4554 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4555 STMT_INFO. For multi-step conversions store the resulting vectors and
4556 call the function recursively. */
4558 static void
4559 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4560 vec<tree> *vec_oprnds0,
4561 vec<tree> *vec_oprnds1,
4562 stmt_vec_info stmt_info, tree vec_dest,
4563 gimple_stmt_iterator *gsi,
4564 enum tree_code code1,
4565 enum tree_code code2, int op_type)
4567 int i;
4568 tree vop0, vop1, new_tmp1, new_tmp2;
4569 gimple *new_stmt1, *new_stmt2;
4570 vec<tree> vec_tmp = vNULL;
4572 vec_tmp.create (vec_oprnds0->length () * 2);
4573 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4575 if (op_type == binary_op)
4576 vop1 = (*vec_oprnds1)[i];
4577 else
4578 vop1 = NULL_TREE;
4580 /* Generate the two halves of promotion operation. */
4581 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4582 op_type, vec_dest, gsi,
4583 stmt_info);
4584 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4585 op_type, vec_dest, gsi,
4586 stmt_info);
4587 if (is_gimple_call (new_stmt1))
4589 new_tmp1 = gimple_call_lhs (new_stmt1);
4590 new_tmp2 = gimple_call_lhs (new_stmt2);
4592 else
4594 new_tmp1 = gimple_assign_lhs (new_stmt1);
4595 new_tmp2 = gimple_assign_lhs (new_stmt2);
4598 /* Store the results for the next step. */
4599 vec_tmp.quick_push (new_tmp1);
4600 vec_tmp.quick_push (new_tmp2);
4603 vec_oprnds0->release ();
4604 *vec_oprnds0 = vec_tmp;
4607 /* Create vectorized promotion stmts for widening stmts using only half the
4608 potential vector size for input. */
4609 static void
4610 vect_create_half_widening_stmts (vec_info *vinfo,
4611 vec<tree> *vec_oprnds0,
4612 vec<tree> *vec_oprnds1,
4613 stmt_vec_info stmt_info, tree vec_dest,
4614 gimple_stmt_iterator *gsi,
4615 enum tree_code code1,
4616 int op_type)
4618 int i;
4619 tree vop0, vop1;
4620 gimple *new_stmt1;
4621 gimple *new_stmt2;
4622 gimple *new_stmt3;
4623 vec<tree> vec_tmp = vNULL;
4625 vec_tmp.create (vec_oprnds0->length ());
4626 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4628 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4630 gcc_assert (op_type == binary_op);
4631 vop1 = (*vec_oprnds1)[i];
4633 /* Widen the first vector input. */
4634 out_type = TREE_TYPE (vec_dest);
4635 new_tmp1 = make_ssa_name (out_type);
4636 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4637 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4638 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4640 /* Widen the second vector input. */
4641 new_tmp2 = make_ssa_name (out_type);
4642 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4643 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4644 /* Perform the operation. With both vector inputs widened. */
4645 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4647 else
4649 /* Perform the operation. With the single vector input widened. */
4650 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4653 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4654 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4655 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4657 /* Store the results for the next step. */
4658 vec_tmp.quick_push (new_tmp3);
4661 vec_oprnds0->release ();
4662 *vec_oprnds0 = vec_tmp;
4666 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4667 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4668 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4669 Return true if STMT_INFO is vectorizable in this way. */
4671 static bool
4672 vectorizable_conversion (vec_info *vinfo,
4673 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4674 gimple **vec_stmt, slp_tree slp_node,
4675 stmt_vector_for_cost *cost_vec)
4677 tree vec_dest;
4678 tree scalar_dest;
4679 tree op0, op1 = NULL_TREE;
4680 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4681 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4682 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4683 tree new_temp;
4684 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4685 int ndts = 2;
4686 poly_uint64 nunits_in;
4687 poly_uint64 nunits_out;
4688 tree vectype_out, vectype_in;
4689 int ncopies, i;
4690 tree lhs_type, rhs_type;
4691 enum { NARROW, NONE, WIDEN } modifier;
4692 vec<tree> vec_oprnds0 = vNULL;
4693 vec<tree> vec_oprnds1 = vNULL;
4694 tree vop0;
4695 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4696 int multi_step_cvt = 0;
4697 vec<tree> interm_types = vNULL;
4698 tree intermediate_type, cvt_type = NULL_TREE;
4699 int op_type;
4700 unsigned short fltsz;
4702 /* Is STMT a vectorizable conversion? */
4704 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4705 return false;
4707 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4708 && ! vec_stmt)
4709 return false;
4711 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4712 if (!stmt)
4713 return false;
4715 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4716 return false;
4718 code = gimple_assign_rhs_code (stmt);
4719 if (!CONVERT_EXPR_CODE_P (code)
4720 && code != FIX_TRUNC_EXPR
4721 && code != FLOAT_EXPR
4722 && code != WIDEN_PLUS_EXPR
4723 && code != WIDEN_MINUS_EXPR
4724 && code != WIDEN_MULT_EXPR
4725 && code != WIDEN_LSHIFT_EXPR)
4726 return false;
4728 bool widen_arith = (code == WIDEN_PLUS_EXPR
4729 || code == WIDEN_MINUS_EXPR
4730 || code == WIDEN_MULT_EXPR
4731 || code == WIDEN_LSHIFT_EXPR);
4732 op_type = TREE_CODE_LENGTH (code);
4734 /* Check types of lhs and rhs. */
4735 scalar_dest = gimple_assign_lhs (stmt);
4736 lhs_type = TREE_TYPE (scalar_dest);
4737 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4739 /* Check the operands of the operation. */
4740 slp_tree slp_op0, slp_op1 = NULL;
4741 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4742 0, &op0, &slp_op0, &dt[0], &vectype_in))
4744 if (dump_enabled_p ())
4745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4746 "use not simple.\n");
4747 return false;
4750 rhs_type = TREE_TYPE (op0);
4751 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4752 && !((INTEGRAL_TYPE_P (lhs_type)
4753 && INTEGRAL_TYPE_P (rhs_type))
4754 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4755 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4756 return false;
4758 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4759 && ((INTEGRAL_TYPE_P (lhs_type)
4760 && !type_has_mode_precision_p (lhs_type))
4761 || (INTEGRAL_TYPE_P (rhs_type)
4762 && !type_has_mode_precision_p (rhs_type))))
4764 if (dump_enabled_p ())
4765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4766 "type conversion to/from bit-precision unsupported."
4767 "\n");
4768 return false;
4771 if (op_type == binary_op)
4773 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4774 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4776 op1 = gimple_assign_rhs2 (stmt);
4777 tree vectype1_in;
4778 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4779 &op1, &slp_op1, &dt[1], &vectype1_in))
4781 if (dump_enabled_p ())
4782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4783 "use not simple.\n");
4784 return false;
4786 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4787 OP1. */
4788 if (!vectype_in)
4789 vectype_in = vectype1_in;
4792 /* If op0 is an external or constant def, infer the vector type
4793 from the scalar type. */
4794 if (!vectype_in)
4795 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4796 if (vec_stmt)
4797 gcc_assert (vectype_in);
4798 if (!vectype_in)
4800 if (dump_enabled_p ())
4801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4802 "no vectype for scalar type %T\n", rhs_type);
4804 return false;
4807 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4808 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4810 if (dump_enabled_p ())
4811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4812 "can't convert between boolean and non "
4813 "boolean vectors %T\n", rhs_type);
4815 return false;
4818 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4819 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4820 if (known_eq (nunits_out, nunits_in))
4821 if (widen_arith)
4822 modifier = WIDEN;
4823 else
4824 modifier = NONE;
4825 else if (multiple_p (nunits_out, nunits_in))
4826 modifier = NARROW;
4827 else
4829 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4830 modifier = WIDEN;
4833 /* Multiple types in SLP are handled by creating the appropriate number of
4834 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4835 case of SLP. */
4836 if (slp_node)
4837 ncopies = 1;
4838 else if (modifier == NARROW)
4839 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4840 else
4841 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4843 /* Sanity check: make sure that at least one copy of the vectorized stmt
4844 needs to be generated. */
4845 gcc_assert (ncopies >= 1);
4847 bool found_mode = false;
4848 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4849 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4850 opt_scalar_mode rhs_mode_iter;
4852 /* Supportable by target? */
4853 switch (modifier)
4855 case NONE:
4856 if (code != FIX_TRUNC_EXPR
4857 && code != FLOAT_EXPR
4858 && !CONVERT_EXPR_CODE_P (code))
4859 return false;
4860 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4861 break;
4862 /* FALLTHRU */
4863 unsupported:
4864 if (dump_enabled_p ())
4865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4866 "conversion not supported by target.\n");
4867 return false;
4869 case WIDEN:
4870 if (known_eq (nunits_in, nunits_out))
4872 if (!supportable_half_widening_operation (code, vectype_out,
4873 vectype_in, &code1))
4874 goto unsupported;
4875 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4876 break;
4878 if (supportable_widening_operation (vinfo, code, stmt_info,
4879 vectype_out, vectype_in, &code1,
4880 &code2, &multi_step_cvt,
4881 &interm_types))
4883 /* Binary widening operation can only be supported directly by the
4884 architecture. */
4885 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4886 break;
4889 if (code != FLOAT_EXPR
4890 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4891 goto unsupported;
4893 fltsz = GET_MODE_SIZE (lhs_mode);
4894 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4896 rhs_mode = rhs_mode_iter.require ();
4897 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4898 break;
4900 cvt_type
4901 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4902 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4903 if (cvt_type == NULL_TREE)
4904 goto unsupported;
4906 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4908 if (!supportable_convert_operation (code, vectype_out,
4909 cvt_type, &codecvt1))
4910 goto unsupported;
4912 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4913 vectype_out, cvt_type,
4914 &codecvt1, &codecvt2,
4915 &multi_step_cvt,
4916 &interm_types))
4917 continue;
4918 else
4919 gcc_assert (multi_step_cvt == 0);
4921 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4922 cvt_type,
4923 vectype_in, &code1, &code2,
4924 &multi_step_cvt, &interm_types))
4926 found_mode = true;
4927 break;
4931 if (!found_mode)
4932 goto unsupported;
4934 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4935 codecvt2 = ERROR_MARK;
4936 else
4938 multi_step_cvt++;
4939 interm_types.safe_push (cvt_type);
4940 cvt_type = NULL_TREE;
4942 break;
4944 case NARROW:
4945 gcc_assert (op_type == unary_op);
4946 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4947 &code1, &multi_step_cvt,
4948 &interm_types))
4949 break;
4951 if (code != FIX_TRUNC_EXPR
4952 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4953 goto unsupported;
4955 cvt_type
4956 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4957 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4958 if (cvt_type == NULL_TREE)
4959 goto unsupported;
4960 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4961 &codecvt1))
4962 goto unsupported;
4963 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4964 &code1, &multi_step_cvt,
4965 &interm_types))
4966 break;
4967 goto unsupported;
4969 default:
4970 gcc_unreachable ();
4973 if (!vec_stmt) /* transformation not required. */
4975 if (slp_node
4976 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4977 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4981 "incompatible vector types for invariants\n");
4982 return false;
4984 DUMP_VECT_SCOPE ("vectorizable_conversion");
4985 if (modifier == NONE)
4987 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4988 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4989 cost_vec);
4991 else if (modifier == NARROW)
4993 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4994 /* The final packing step produces one vector result per copy. */
4995 unsigned int nvectors
4996 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4997 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4998 multi_step_cvt, cost_vec,
4999 widen_arith);
5001 else
5003 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5004 /* The initial unpacking step produces two vector results
5005 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5006 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5007 unsigned int nvectors
5008 = (slp_node
5009 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5010 : ncopies * 2);
5011 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5012 multi_step_cvt, cost_vec,
5013 widen_arith);
5015 interm_types.release ();
5016 return true;
5019 /* Transform. */
5020 if (dump_enabled_p ())
5021 dump_printf_loc (MSG_NOTE, vect_location,
5022 "transform conversion. ncopies = %d.\n", ncopies);
5024 if (op_type == binary_op)
5026 if (CONSTANT_CLASS_P (op0))
5027 op0 = fold_convert (TREE_TYPE (op1), op0);
5028 else if (CONSTANT_CLASS_P (op1))
5029 op1 = fold_convert (TREE_TYPE (op0), op1);
5032 /* In case of multi-step conversion, we first generate conversion operations
5033 to the intermediate types, and then from that types to the final one.
5034 We create vector destinations for the intermediate type (TYPES) received
5035 from supportable_*_operation, and store them in the correct order
5036 for future use in vect_create_vectorized_*_stmts (). */
5037 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5038 vec_dest = vect_create_destination_var (scalar_dest,
5039 (cvt_type && modifier == WIDEN)
5040 ? cvt_type : vectype_out);
5041 vec_dsts.quick_push (vec_dest);
5043 if (multi_step_cvt)
5045 for (i = interm_types.length () - 1;
5046 interm_types.iterate (i, &intermediate_type); i--)
5048 vec_dest = vect_create_destination_var (scalar_dest,
5049 intermediate_type);
5050 vec_dsts.quick_push (vec_dest);
5054 if (cvt_type)
5055 vec_dest = vect_create_destination_var (scalar_dest,
5056 modifier == WIDEN
5057 ? vectype_out : cvt_type);
5059 int ninputs = 1;
5060 if (!slp_node)
5062 if (modifier == WIDEN)
5064 else if (modifier == NARROW)
5066 if (multi_step_cvt)
5067 ninputs = vect_pow2 (multi_step_cvt);
5068 ninputs *= 2;
5072 switch (modifier)
5074 case NONE:
5075 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5076 op0, &vec_oprnds0);
5077 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5079 /* Arguments are ready, create the new vector stmt. */
5080 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5081 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5082 new_temp = make_ssa_name (vec_dest, new_stmt);
5083 gimple_assign_set_lhs (new_stmt, new_temp);
5084 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5086 if (slp_node)
5087 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5088 else
5089 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5091 break;
5093 case WIDEN:
5094 /* In case the vectorization factor (VF) is bigger than the number
5095 of elements that we can fit in a vectype (nunits), we have to
5096 generate more than one vector stmt - i.e - we need to "unroll"
5097 the vector stmt by a factor VF/nunits. */
5098 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5099 op0, &vec_oprnds0,
5100 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5101 &vec_oprnds1);
5102 if (code == WIDEN_LSHIFT_EXPR)
5104 int oprnds_size = vec_oprnds0.length ();
5105 vec_oprnds1.create (oprnds_size);
5106 for (i = 0; i < oprnds_size; ++i)
5107 vec_oprnds1.quick_push (op1);
5109 /* Arguments are ready. Create the new vector stmts. */
5110 for (i = multi_step_cvt; i >= 0; i--)
5112 tree this_dest = vec_dsts[i];
5113 enum tree_code c1 = code1, c2 = code2;
5114 if (i == 0 && codecvt2 != ERROR_MARK)
5116 c1 = codecvt1;
5117 c2 = codecvt2;
5119 if (known_eq (nunits_out, nunits_in))
5120 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5121 &vec_oprnds1, stmt_info,
5122 this_dest, gsi,
5123 c1, op_type);
5124 else
5125 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5126 &vec_oprnds1, stmt_info,
5127 this_dest, gsi,
5128 c1, c2, op_type);
5131 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5133 gimple *new_stmt;
5134 if (cvt_type)
5136 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5137 new_temp = make_ssa_name (vec_dest);
5138 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5139 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5141 else
5142 new_stmt = SSA_NAME_DEF_STMT (vop0);
5144 if (slp_node)
5145 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5146 else
5147 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5149 break;
5151 case NARROW:
5152 /* In case the vectorization factor (VF) is bigger than the number
5153 of elements that we can fit in a vectype (nunits), we have to
5154 generate more than one vector stmt - i.e - we need to "unroll"
5155 the vector stmt by a factor VF/nunits. */
5156 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5157 op0, &vec_oprnds0);
5158 /* Arguments are ready. Create the new vector stmts. */
5159 if (cvt_type)
5160 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5162 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5163 new_temp = make_ssa_name (vec_dest);
5164 gassign *new_stmt
5165 = gimple_build_assign (new_temp, codecvt1, vop0);
5166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5167 vec_oprnds0[i] = new_temp;
5170 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5171 multi_step_cvt,
5172 stmt_info, vec_dsts, gsi,
5173 slp_node, code1);
5174 break;
5176 if (!slp_node)
5177 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5179 vec_oprnds0.release ();
5180 vec_oprnds1.release ();
5181 interm_types.release ();
5183 return true;
5186 /* Return true if we can assume from the scalar form of STMT_INFO that
5187 neither the scalar nor the vector forms will generate code. STMT_INFO
5188 is known not to involve a data reference. */
5190 bool
5191 vect_nop_conversion_p (stmt_vec_info stmt_info)
5193 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5194 if (!stmt)
5195 return false;
5197 tree lhs = gimple_assign_lhs (stmt);
5198 tree_code code = gimple_assign_rhs_code (stmt);
5199 tree rhs = gimple_assign_rhs1 (stmt);
5201 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5202 return true;
5204 if (CONVERT_EXPR_CODE_P (code))
5205 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5207 return false;
5210 /* Function vectorizable_assignment.
5212 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5213 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5214 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5215 Return true if STMT_INFO is vectorizable in this way. */
5217 static bool
5218 vectorizable_assignment (vec_info *vinfo,
5219 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5220 gimple **vec_stmt, slp_tree slp_node,
5221 stmt_vector_for_cost *cost_vec)
5223 tree vec_dest;
5224 tree scalar_dest;
5225 tree op;
5226 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5227 tree new_temp;
5228 enum vect_def_type dt[1] = {vect_unknown_def_type};
5229 int ndts = 1;
5230 int ncopies;
5231 int i;
5232 vec<tree> vec_oprnds = vNULL;
5233 tree vop;
5234 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5235 enum tree_code code;
5236 tree vectype_in;
5238 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5239 return false;
5241 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5242 && ! vec_stmt)
5243 return false;
5245 /* Is vectorizable assignment? */
5246 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5247 if (!stmt)
5248 return false;
5250 scalar_dest = gimple_assign_lhs (stmt);
5251 if (TREE_CODE (scalar_dest) != SSA_NAME)
5252 return false;
5254 if (STMT_VINFO_DATA_REF (stmt_info))
5255 return false;
5257 code = gimple_assign_rhs_code (stmt);
5258 if (!(gimple_assign_single_p (stmt)
5259 || code == PAREN_EXPR
5260 || CONVERT_EXPR_CODE_P (code)))
5261 return false;
5263 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5264 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5266 /* Multiple types in SLP are handled by creating the appropriate number of
5267 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5268 case of SLP. */
5269 if (slp_node)
5270 ncopies = 1;
5271 else
5272 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5274 gcc_assert (ncopies >= 1);
5276 slp_tree slp_op;
5277 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5278 &dt[0], &vectype_in))
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5282 "use not simple.\n");
5283 return false;
5285 if (!vectype_in)
5286 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5288 /* We can handle NOP_EXPR conversions that do not change the number
5289 of elements or the vector size. */
5290 if ((CONVERT_EXPR_CODE_P (code)
5291 || code == VIEW_CONVERT_EXPR)
5292 && (!vectype_in
5293 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5294 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5295 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5296 return false;
5298 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5299 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5301 if (dump_enabled_p ())
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5303 "can't convert between boolean and non "
5304 "boolean vectors %T\n", TREE_TYPE (op));
5306 return false;
5309 /* We do not handle bit-precision changes. */
5310 if ((CONVERT_EXPR_CODE_P (code)
5311 || code == VIEW_CONVERT_EXPR)
5312 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5313 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5314 || !type_has_mode_precision_p (TREE_TYPE (op)))
5315 /* But a conversion that does not change the bit-pattern is ok. */
5316 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5317 > TYPE_PRECISION (TREE_TYPE (op)))
5318 && TYPE_UNSIGNED (TREE_TYPE (op))))
5320 if (dump_enabled_p ())
5321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5322 "type conversion to/from bit-precision "
5323 "unsupported.\n");
5324 return false;
5327 if (!vec_stmt) /* transformation not required. */
5329 if (slp_node
5330 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5334 "incompatible vector types for invariants\n");
5335 return false;
5337 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5338 DUMP_VECT_SCOPE ("vectorizable_assignment");
5339 if (!vect_nop_conversion_p (stmt_info))
5340 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5341 cost_vec);
5342 return true;
5345 /* Transform. */
5346 if (dump_enabled_p ())
5347 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5349 /* Handle def. */
5350 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5352 /* Handle use. */
5353 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5355 /* Arguments are ready. create the new vector stmt. */
5356 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5358 if (CONVERT_EXPR_CODE_P (code)
5359 || code == VIEW_CONVERT_EXPR)
5360 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5361 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5362 new_temp = make_ssa_name (vec_dest, new_stmt);
5363 gimple_assign_set_lhs (new_stmt, new_temp);
5364 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5365 if (slp_node)
5366 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5367 else
5368 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5370 if (!slp_node)
5371 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5373 vec_oprnds.release ();
5374 return true;
5378 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5379 either as shift by a scalar or by a vector. */
5381 bool
5382 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5385 machine_mode vec_mode;
5386 optab optab;
5387 int icode;
5388 tree vectype;
5390 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5391 if (!vectype)
5392 return false;
5394 optab = optab_for_tree_code (code, vectype, optab_scalar);
5395 if (!optab
5396 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5398 optab = optab_for_tree_code (code, vectype, optab_vector);
5399 if (!optab
5400 || (optab_handler (optab, TYPE_MODE (vectype))
5401 == CODE_FOR_nothing))
5402 return false;
5405 vec_mode = TYPE_MODE (vectype);
5406 icode = (int) optab_handler (optab, vec_mode);
5407 if (icode == CODE_FOR_nothing)
5408 return false;
5410 return true;
5414 /* Function vectorizable_shift.
5416 Check if STMT_INFO performs a shift operation that can be vectorized.
5417 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5418 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5419 Return true if STMT_INFO is vectorizable in this way. */
5421 static bool
5422 vectorizable_shift (vec_info *vinfo,
5423 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5424 gimple **vec_stmt, slp_tree slp_node,
5425 stmt_vector_for_cost *cost_vec)
5427 tree vec_dest;
5428 tree scalar_dest;
5429 tree op0, op1 = NULL;
5430 tree vec_oprnd1 = NULL_TREE;
5431 tree vectype;
5432 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5433 enum tree_code code;
5434 machine_mode vec_mode;
5435 tree new_temp;
5436 optab optab;
5437 int icode;
5438 machine_mode optab_op2_mode;
5439 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5440 int ndts = 2;
5441 poly_uint64 nunits_in;
5442 poly_uint64 nunits_out;
5443 tree vectype_out;
5444 tree op1_vectype;
5445 int ncopies;
5446 int i;
5447 vec<tree> vec_oprnds0 = vNULL;
5448 vec<tree> vec_oprnds1 = vNULL;
5449 tree vop0, vop1;
5450 unsigned int k;
5451 bool scalar_shift_arg = true;
5452 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5453 bool incompatible_op1_vectype_p = false;
5455 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5456 return false;
5458 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5459 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5460 && ! vec_stmt)
5461 return false;
5463 /* Is STMT a vectorizable binary/unary operation? */
5464 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5465 if (!stmt)
5466 return false;
5468 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5469 return false;
5471 code = gimple_assign_rhs_code (stmt);
5473 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5474 || code == RROTATE_EXPR))
5475 return false;
5477 scalar_dest = gimple_assign_lhs (stmt);
5478 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5479 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5481 if (dump_enabled_p ())
5482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5483 "bit-precision shifts not supported.\n");
5484 return false;
5487 slp_tree slp_op0;
5488 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5489 0, &op0, &slp_op0, &dt[0], &vectype))
5491 if (dump_enabled_p ())
5492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5493 "use not simple.\n");
5494 return false;
5496 /* If op0 is an external or constant def, infer the vector type
5497 from the scalar type. */
5498 if (!vectype)
5499 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5500 if (vec_stmt)
5501 gcc_assert (vectype);
5502 if (!vectype)
5504 if (dump_enabled_p ())
5505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5506 "no vectype for scalar type\n");
5507 return false;
5510 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5511 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5512 if (maybe_ne (nunits_out, nunits_in))
5513 return false;
5515 stmt_vec_info op1_def_stmt_info;
5516 slp_tree slp_op1;
5517 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5518 &dt[1], &op1_vectype, &op1_def_stmt_info))
5520 if (dump_enabled_p ())
5521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5522 "use not simple.\n");
5523 return false;
5526 /* Multiple types in SLP are handled by creating the appropriate number of
5527 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5528 case of SLP. */
5529 if (slp_node)
5530 ncopies = 1;
5531 else
5532 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5534 gcc_assert (ncopies >= 1);
5536 /* Determine whether the shift amount is a vector, or scalar. If the
5537 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5539 if ((dt[1] == vect_internal_def
5540 || dt[1] == vect_induction_def
5541 || dt[1] == vect_nested_cycle)
5542 && !slp_node)
5543 scalar_shift_arg = false;
5544 else if (dt[1] == vect_constant_def
5545 || dt[1] == vect_external_def
5546 || dt[1] == vect_internal_def)
5548 /* In SLP, need to check whether the shift count is the same,
5549 in loops if it is a constant or invariant, it is always
5550 a scalar shift. */
5551 if (slp_node)
5553 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5554 stmt_vec_info slpstmt_info;
5556 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5558 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5559 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5560 scalar_shift_arg = false;
5563 /* For internal SLP defs we have to make sure we see scalar stmts
5564 for all vector elements.
5565 ??? For different vectors we could resort to a different
5566 scalar shift operand but code-generation below simply always
5567 takes the first. */
5568 if (dt[1] == vect_internal_def
5569 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5570 stmts.length ()))
5571 scalar_shift_arg = false;
5574 /* If the shift amount is computed by a pattern stmt we cannot
5575 use the scalar amount directly thus give up and use a vector
5576 shift. */
5577 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5578 scalar_shift_arg = false;
5580 else
5582 if (dump_enabled_p ())
5583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5584 "operand mode requires invariant argument.\n");
5585 return false;
5588 /* Vector shifted by vector. */
5589 bool was_scalar_shift_arg = scalar_shift_arg;
5590 if (!scalar_shift_arg)
5592 optab = optab_for_tree_code (code, vectype, optab_vector);
5593 if (dump_enabled_p ())
5594 dump_printf_loc (MSG_NOTE, vect_location,
5595 "vector/vector shift/rotate found.\n");
5597 if (!op1_vectype)
5598 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5599 slp_op1);
5600 incompatible_op1_vectype_p
5601 = (op1_vectype == NULL_TREE
5602 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5603 TYPE_VECTOR_SUBPARTS (vectype))
5604 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5605 if (incompatible_op1_vectype_p
5606 && (!slp_node
5607 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5608 || slp_op1->refcnt != 1))
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5612 "unusable type for last operand in"
5613 " vector/vector shift/rotate.\n");
5614 return false;
5617 /* See if the machine has a vector shifted by scalar insn and if not
5618 then see if it has a vector shifted by vector insn. */
5619 else
5621 optab = optab_for_tree_code (code, vectype, optab_scalar);
5622 if (optab
5623 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5625 if (dump_enabled_p ())
5626 dump_printf_loc (MSG_NOTE, vect_location,
5627 "vector/scalar shift/rotate found.\n");
5629 else
5631 optab = optab_for_tree_code (code, vectype, optab_vector);
5632 if (optab
5633 && (optab_handler (optab, TYPE_MODE (vectype))
5634 != CODE_FOR_nothing))
5636 scalar_shift_arg = false;
5638 if (dump_enabled_p ())
5639 dump_printf_loc (MSG_NOTE, vect_location,
5640 "vector/vector shift/rotate found.\n");
5642 if (!op1_vectype)
5643 op1_vectype = get_vectype_for_scalar_type (vinfo,
5644 TREE_TYPE (op1),
5645 slp_op1);
5647 /* Unlike the other binary operators, shifts/rotates have
5648 the rhs being int, instead of the same type as the lhs,
5649 so make sure the scalar is the right type if we are
5650 dealing with vectors of long long/long/short/char. */
5651 incompatible_op1_vectype_p
5652 = (!op1_vectype
5653 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5654 TREE_TYPE (op1)));
5655 if (incompatible_op1_vectype_p
5656 && dt[1] == vect_internal_def)
5658 if (dump_enabled_p ())
5659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5660 "unusable type for last operand in"
5661 " vector/vector shift/rotate.\n");
5662 return false;
5668 /* Supportable by target? */
5669 if (!optab)
5671 if (dump_enabled_p ())
5672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5673 "no optab.\n");
5674 return false;
5676 vec_mode = TYPE_MODE (vectype);
5677 icode = (int) optab_handler (optab, vec_mode);
5678 if (icode == CODE_FOR_nothing)
5680 if (dump_enabled_p ())
5681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5682 "op not supported by target.\n");
5683 return false;
5685 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5686 if (vect_emulated_vector_p (vectype))
5687 return false;
5689 if (!vec_stmt) /* transformation not required. */
5691 if (slp_node
5692 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5693 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5694 && (!incompatible_op1_vectype_p
5695 || dt[1] == vect_constant_def)
5696 && !vect_maybe_update_slp_op_vectype
5697 (slp_op1,
5698 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5700 if (dump_enabled_p ())
5701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5702 "incompatible vector types for invariants\n");
5703 return false;
5705 /* Now adjust the constant shift amount in place. */
5706 if (slp_node
5707 && incompatible_op1_vectype_p
5708 && dt[1] == vect_constant_def)
5710 for (unsigned i = 0;
5711 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5713 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5714 = fold_convert (TREE_TYPE (vectype),
5715 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5716 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5717 == INTEGER_CST));
5720 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5721 DUMP_VECT_SCOPE ("vectorizable_shift");
5722 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5723 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5724 return true;
5727 /* Transform. */
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_NOTE, vect_location,
5731 "transform binary/unary operation.\n");
5733 if (incompatible_op1_vectype_p && !slp_node)
5735 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5736 op1 = fold_convert (TREE_TYPE (vectype), op1);
5737 if (dt[1] != vect_constant_def)
5738 op1 = vect_init_vector (vinfo, stmt_info, op1,
5739 TREE_TYPE (vectype), NULL);
5742 /* Handle def. */
5743 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5745 if (scalar_shift_arg && dt[1] != vect_internal_def)
5747 /* Vector shl and shr insn patterns can be defined with scalar
5748 operand 2 (shift operand). In this case, use constant or loop
5749 invariant op1 directly, without extending it to vector mode
5750 first. */
5751 optab_op2_mode = insn_data[icode].operand[2].mode;
5752 if (!VECTOR_MODE_P (optab_op2_mode))
5754 if (dump_enabled_p ())
5755 dump_printf_loc (MSG_NOTE, vect_location,
5756 "operand 1 using scalar mode.\n");
5757 vec_oprnd1 = op1;
5758 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5759 vec_oprnds1.quick_push (vec_oprnd1);
5760 /* Store vec_oprnd1 for every vector stmt to be created.
5761 We check during the analysis that all the shift arguments
5762 are the same.
5763 TODO: Allow different constants for different vector
5764 stmts generated for an SLP instance. */
5765 for (k = 0;
5766 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5767 vec_oprnds1.quick_push (vec_oprnd1);
5770 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5772 if (was_scalar_shift_arg)
5774 /* If the argument was the same in all lanes create
5775 the correctly typed vector shift amount directly. */
5776 op1 = fold_convert (TREE_TYPE (vectype), op1);
5777 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5778 !loop_vinfo ? gsi : NULL);
5779 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5780 !loop_vinfo ? gsi : NULL);
5781 vec_oprnds1.create (slp_node->vec_stmts_size);
5782 for (k = 0; k < slp_node->vec_stmts_size; k++)
5783 vec_oprnds1.quick_push (vec_oprnd1);
5785 else if (dt[1] == vect_constant_def)
5786 /* The constant shift amount has been adjusted in place. */
5788 else
5789 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5792 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5793 (a special case for certain kind of vector shifts); otherwise,
5794 operand 1 should be of a vector type (the usual case). */
5795 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5796 op0, &vec_oprnds0,
5797 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5799 /* Arguments are ready. Create the new vector stmt. */
5800 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5802 /* For internal defs where we need to use a scalar shift arg
5803 extract the first lane. */
5804 if (scalar_shift_arg && dt[1] == vect_internal_def)
5806 vop1 = vec_oprnds1[0];
5807 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5808 gassign *new_stmt
5809 = gimple_build_assign (new_temp,
5810 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5811 vop1,
5812 TYPE_SIZE (TREE_TYPE (new_temp)),
5813 bitsize_zero_node));
5814 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5815 vop1 = new_temp;
5817 else
5818 vop1 = vec_oprnds1[i];
5819 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5820 new_temp = make_ssa_name (vec_dest, new_stmt);
5821 gimple_assign_set_lhs (new_stmt, new_temp);
5822 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5823 if (slp_node)
5824 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5825 else
5826 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5829 if (!slp_node)
5830 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5832 vec_oprnds0.release ();
5833 vec_oprnds1.release ();
5835 return true;
5839 /* Function vectorizable_operation.
5841 Check if STMT_INFO performs a binary, unary or ternary operation that can
5842 be vectorized.
5843 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5844 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5845 Return true if STMT_INFO is vectorizable in this way. */
5847 static bool
5848 vectorizable_operation (vec_info *vinfo,
5849 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5850 gimple **vec_stmt, slp_tree slp_node,
5851 stmt_vector_for_cost *cost_vec)
5853 tree vec_dest;
5854 tree scalar_dest;
5855 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5856 tree vectype;
5857 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5858 enum tree_code code, orig_code;
5859 machine_mode vec_mode;
5860 tree new_temp;
5861 int op_type;
5862 optab optab;
5863 bool target_support_p;
5864 enum vect_def_type dt[3]
5865 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5866 int ndts = 3;
5867 poly_uint64 nunits_in;
5868 poly_uint64 nunits_out;
5869 tree vectype_out;
5870 int ncopies, vec_num;
5871 int i;
5872 vec<tree> vec_oprnds0 = vNULL;
5873 vec<tree> vec_oprnds1 = vNULL;
5874 vec<tree> vec_oprnds2 = vNULL;
5875 tree vop0, vop1, vop2;
5876 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5878 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5879 return false;
5881 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5882 && ! vec_stmt)
5883 return false;
5885 /* Is STMT a vectorizable binary/unary operation? */
5886 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5887 if (!stmt)
5888 return false;
5890 /* Loads and stores are handled in vectorizable_{load,store}. */
5891 if (STMT_VINFO_DATA_REF (stmt_info))
5892 return false;
5894 orig_code = code = gimple_assign_rhs_code (stmt);
5896 /* Shifts are handled in vectorizable_shift. */
5897 if (code == LSHIFT_EXPR
5898 || code == RSHIFT_EXPR
5899 || code == LROTATE_EXPR
5900 || code == RROTATE_EXPR)
5901 return false;
5903 /* Comparisons are handled in vectorizable_comparison. */
5904 if (TREE_CODE_CLASS (code) == tcc_comparison)
5905 return false;
5907 /* Conditions are handled in vectorizable_condition. */
5908 if (code == COND_EXPR)
5909 return false;
5911 /* For pointer addition and subtraction, we should use the normal
5912 plus and minus for the vector operation. */
5913 if (code == POINTER_PLUS_EXPR)
5914 code = PLUS_EXPR;
5915 if (code == POINTER_DIFF_EXPR)
5916 code = MINUS_EXPR;
5918 /* Support only unary or binary operations. */
5919 op_type = TREE_CODE_LENGTH (code);
5920 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5922 if (dump_enabled_p ())
5923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5924 "num. args = %d (not unary/binary/ternary op).\n",
5925 op_type);
5926 return false;
5929 scalar_dest = gimple_assign_lhs (stmt);
5930 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5932 /* Most operations cannot handle bit-precision types without extra
5933 truncations. */
5934 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5935 if (!mask_op_p
5936 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5937 /* Exception are bitwise binary operations. */
5938 && code != BIT_IOR_EXPR
5939 && code != BIT_XOR_EXPR
5940 && code != BIT_AND_EXPR)
5942 if (dump_enabled_p ())
5943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5944 "bit-precision arithmetic not supported.\n");
5945 return false;
5948 slp_tree slp_op0;
5949 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5950 0, &op0, &slp_op0, &dt[0], &vectype))
5952 if (dump_enabled_p ())
5953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5954 "use not simple.\n");
5955 return false;
5957 /* If op0 is an external or constant def, infer the vector type
5958 from the scalar type. */
5959 if (!vectype)
5961 /* For boolean type we cannot determine vectype by
5962 invariant value (don't know whether it is a vector
5963 of booleans or vector of integers). We use output
5964 vectype because operations on boolean don't change
5965 type. */
5966 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5968 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5972 "not supported operation on bool value.\n");
5973 return false;
5975 vectype = vectype_out;
5977 else
5978 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5979 slp_node);
5981 if (vec_stmt)
5982 gcc_assert (vectype);
5983 if (!vectype)
5985 if (dump_enabled_p ())
5986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5987 "no vectype for scalar type %T\n",
5988 TREE_TYPE (op0));
5990 return false;
5993 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5994 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5995 if (maybe_ne (nunits_out, nunits_in))
5996 return false;
5998 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5999 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6000 if (op_type == binary_op || op_type == ternary_op)
6002 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6003 1, &op1, &slp_op1, &dt[1], &vectype2))
6005 if (dump_enabled_p ())
6006 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6007 "use not simple.\n");
6008 return false;
6011 if (op_type == ternary_op)
6013 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6014 2, &op2, &slp_op2, &dt[2], &vectype3))
6016 if (dump_enabled_p ())
6017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6018 "use not simple.\n");
6019 return false;
6023 /* Multiple types in SLP are handled by creating the appropriate number of
6024 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6025 case of SLP. */
6026 if (slp_node)
6028 ncopies = 1;
6029 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6031 else
6033 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6034 vec_num = 1;
6037 gcc_assert (ncopies >= 1);
6039 /* Reject attempts to combine mask types with nonmask types, e.g. if
6040 we have an AND between a (nonmask) boolean loaded from memory and
6041 a (mask) boolean result of a comparison.
6043 TODO: We could easily fix these cases up using pattern statements. */
6044 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6045 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6046 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6048 if (dump_enabled_p ())
6049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6050 "mixed mask and nonmask vector types\n");
6051 return false;
6054 /* Supportable by target? */
6056 vec_mode = TYPE_MODE (vectype);
6057 if (code == MULT_HIGHPART_EXPR)
6058 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6059 else
6061 optab = optab_for_tree_code (code, vectype, optab_default);
6062 if (!optab)
6064 if (dump_enabled_p ())
6065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6066 "no optab.\n");
6067 return false;
6069 target_support_p = (optab_handler (optab, vec_mode)
6070 != CODE_FOR_nothing);
6073 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6074 if (!target_support_p)
6076 if (dump_enabled_p ())
6077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6078 "op not supported by target.\n");
6079 /* Check only during analysis. */
6080 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6081 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6082 return false;
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_NOTE, vect_location,
6085 "proceeding using word mode.\n");
6086 using_emulated_vectors_p = true;
6089 if (using_emulated_vectors_p
6090 && !vect_can_vectorize_without_simd_p (code))
6092 if (dump_enabled_p ())
6093 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6094 return false;
6097 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6098 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6099 internal_fn cond_fn = get_conditional_internal_fn (code);
6101 if (!vec_stmt) /* transformation not required. */
6103 /* If this operation is part of a reduction, a fully-masked loop
6104 should only change the active lanes of the reduction chain,
6105 keeping the inactive lanes as-is. */
6106 if (loop_vinfo
6107 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6108 && reduc_idx >= 0)
6110 if (cond_fn == IFN_LAST
6111 || !direct_internal_fn_supported_p (cond_fn, vectype,
6112 OPTIMIZE_FOR_SPEED))
6114 if (dump_enabled_p ())
6115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6116 "can't use a fully-masked loop because no"
6117 " conditional operation is available.\n");
6118 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6120 else
6121 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6122 vectype, NULL);
6125 /* Put types on constant and invariant SLP children. */
6126 if (slp_node
6127 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6128 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6129 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6131 if (dump_enabled_p ())
6132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6133 "incompatible vector types for invariants\n");
6134 return false;
6137 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6138 DUMP_VECT_SCOPE ("vectorizable_operation");
6139 vect_model_simple_cost (vinfo, stmt_info,
6140 ncopies, dt, ndts, slp_node, cost_vec);
6141 if (using_emulated_vectors_p)
6143 /* The above vect_model_simple_cost call handles constants
6144 in the prologue and (mis-)costs one of the stmts as
6145 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6146 for the actual lowering that will be applied. */
6147 unsigned n
6148 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6149 switch (code)
6151 case PLUS_EXPR:
6152 n *= 5;
6153 break;
6154 case MINUS_EXPR:
6155 n *= 6;
6156 break;
6157 case NEGATE_EXPR:
6158 n *= 4;
6159 break;
6160 default:;
6162 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6164 return true;
6167 /* Transform. */
6169 if (dump_enabled_p ())
6170 dump_printf_loc (MSG_NOTE, vect_location,
6171 "transform binary/unary operation.\n");
6173 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6175 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6176 vectors with unsigned elements, but the result is signed. So, we
6177 need to compute the MINUS_EXPR into vectype temporary and
6178 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6179 tree vec_cvt_dest = NULL_TREE;
6180 if (orig_code == POINTER_DIFF_EXPR)
6182 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6183 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6185 /* Handle def. */
6186 else
6187 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6189 /* In case the vectorization factor (VF) is bigger than the number
6190 of elements that we can fit in a vectype (nunits), we have to generate
6191 more than one vector stmt - i.e - we need to "unroll" the
6192 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6193 from one copy of the vector stmt to the next, in the field
6194 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6195 stages to find the correct vector defs to be used when vectorizing
6196 stmts that use the defs of the current stmt. The example below
6197 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6198 we need to create 4 vectorized stmts):
6200 before vectorization:
6201 RELATED_STMT VEC_STMT
6202 S1: x = memref - -
6203 S2: z = x + 1 - -
6205 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6206 there):
6207 RELATED_STMT VEC_STMT
6208 VS1_0: vx0 = memref0 VS1_1 -
6209 VS1_1: vx1 = memref1 VS1_2 -
6210 VS1_2: vx2 = memref2 VS1_3 -
6211 VS1_3: vx3 = memref3 - -
6212 S1: x = load - VS1_0
6213 S2: z = x + 1 - -
6215 step2: vectorize stmt S2 (done here):
6216 To vectorize stmt S2 we first need to find the relevant vector
6217 def for the first operand 'x'. This is, as usual, obtained from
6218 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6219 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6220 relevant vector def 'vx0'. Having found 'vx0' we can generate
6221 the vector stmt VS2_0, and as usual, record it in the
6222 STMT_VINFO_VEC_STMT of stmt S2.
6223 When creating the second copy (VS2_1), we obtain the relevant vector
6224 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6225 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6226 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6227 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6228 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6229 chain of stmts and pointers:
6230 RELATED_STMT VEC_STMT
6231 VS1_0: vx0 = memref0 VS1_1 -
6232 VS1_1: vx1 = memref1 VS1_2 -
6233 VS1_2: vx2 = memref2 VS1_3 -
6234 VS1_3: vx3 = memref3 - -
6235 S1: x = load - VS1_0
6236 VS2_0: vz0 = vx0 + v1 VS2_1 -
6237 VS2_1: vz1 = vx1 + v1 VS2_2 -
6238 VS2_2: vz2 = vx2 + v1 VS2_3 -
6239 VS2_3: vz3 = vx3 + v1 - -
6240 S2: z = x + 1 - VS2_0 */
6242 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6243 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6244 /* Arguments are ready. Create the new vector stmt. */
6245 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6247 gimple *new_stmt = NULL;
6248 vop1 = ((op_type == binary_op || op_type == ternary_op)
6249 ? vec_oprnds1[i] : NULL_TREE);
6250 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6251 if (masked_loop_p && reduc_idx >= 0)
6253 /* Perform the operation on active elements only and take
6254 inactive elements from the reduction chain input. */
6255 gcc_assert (!vop2);
6256 vop2 = reduc_idx == 1 ? vop1 : vop0;
6257 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6258 vectype, i);
6259 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6260 vop0, vop1, vop2);
6261 new_temp = make_ssa_name (vec_dest, call);
6262 gimple_call_set_lhs (call, new_temp);
6263 gimple_call_set_nothrow (call, true);
6264 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6265 new_stmt = call;
6267 else
6269 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6270 new_temp = make_ssa_name (vec_dest, new_stmt);
6271 gimple_assign_set_lhs (new_stmt, new_temp);
6272 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6273 if (vec_cvt_dest)
6275 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6276 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6277 new_temp);
6278 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6279 gimple_assign_set_lhs (new_stmt, new_temp);
6280 vect_finish_stmt_generation (vinfo, stmt_info,
6281 new_stmt, gsi);
6284 if (slp_node)
6285 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6286 else
6287 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6290 if (!slp_node)
6291 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6293 vec_oprnds0.release ();
6294 vec_oprnds1.release ();
6295 vec_oprnds2.release ();
6297 return true;
6300 /* A helper function to ensure data reference DR_INFO's base alignment. */
6302 static void
6303 ensure_base_align (dr_vec_info *dr_info)
6305 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6306 return;
6308 if (dr_info->base_misaligned)
6310 tree base_decl = dr_info->base_decl;
6312 // We should only be able to increase the alignment of a base object if
6313 // we know what its new alignment should be at compile time.
6314 unsigned HOST_WIDE_INT align_base_to =
6315 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6317 if (decl_in_symtab_p (base_decl))
6318 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6319 else if (DECL_ALIGN (base_decl) < align_base_to)
6321 SET_DECL_ALIGN (base_decl, align_base_to);
6322 DECL_USER_ALIGN (base_decl) = 1;
6324 dr_info->base_misaligned = false;
6329 /* Function get_group_alias_ptr_type.
6331 Return the alias type for the group starting at FIRST_STMT_INFO. */
6333 static tree
6334 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6336 struct data_reference *first_dr, *next_dr;
6338 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6339 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6340 while (next_stmt_info)
6342 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6343 if (get_alias_set (DR_REF (first_dr))
6344 != get_alias_set (DR_REF (next_dr)))
6346 if (dump_enabled_p ())
6347 dump_printf_loc (MSG_NOTE, vect_location,
6348 "conflicting alias set types.\n");
6349 return ptr_type_node;
6351 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6353 return reference_alias_ptr_type (DR_REF (first_dr));
6357 /* Function scan_operand_equal_p.
6359 Helper function for check_scan_store. Compare two references
6360 with .GOMP_SIMD_LANE bases. */
6362 static bool
6363 scan_operand_equal_p (tree ref1, tree ref2)
6365 tree ref[2] = { ref1, ref2 };
6366 poly_int64 bitsize[2], bitpos[2];
6367 tree offset[2], base[2];
6368 for (int i = 0; i < 2; ++i)
6370 machine_mode mode;
6371 int unsignedp, reversep, volatilep = 0;
6372 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6373 &offset[i], &mode, &unsignedp,
6374 &reversep, &volatilep);
6375 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6376 return false;
6377 if (TREE_CODE (base[i]) == MEM_REF
6378 && offset[i] == NULL_TREE
6379 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6381 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6382 if (is_gimple_assign (def_stmt)
6383 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6384 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6385 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6387 if (maybe_ne (mem_ref_offset (base[i]), 0))
6388 return false;
6389 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6390 offset[i] = gimple_assign_rhs2 (def_stmt);
6395 if (!operand_equal_p (base[0], base[1], 0))
6396 return false;
6397 if (maybe_ne (bitsize[0], bitsize[1]))
6398 return false;
6399 if (offset[0] != offset[1])
6401 if (!offset[0] || !offset[1])
6402 return false;
6403 if (!operand_equal_p (offset[0], offset[1], 0))
6405 tree step[2];
6406 for (int i = 0; i < 2; ++i)
6408 step[i] = integer_one_node;
6409 if (TREE_CODE (offset[i]) == SSA_NAME)
6411 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6412 if (is_gimple_assign (def_stmt)
6413 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6414 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6415 == INTEGER_CST))
6417 step[i] = gimple_assign_rhs2 (def_stmt);
6418 offset[i] = gimple_assign_rhs1 (def_stmt);
6421 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6423 step[i] = TREE_OPERAND (offset[i], 1);
6424 offset[i] = TREE_OPERAND (offset[i], 0);
6426 tree rhs1 = NULL_TREE;
6427 if (TREE_CODE (offset[i]) == SSA_NAME)
6429 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6430 if (gimple_assign_cast_p (def_stmt))
6431 rhs1 = gimple_assign_rhs1 (def_stmt);
6433 else if (CONVERT_EXPR_P (offset[i]))
6434 rhs1 = TREE_OPERAND (offset[i], 0);
6435 if (rhs1
6436 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6437 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6438 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6439 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6440 offset[i] = rhs1;
6442 if (!operand_equal_p (offset[0], offset[1], 0)
6443 || !operand_equal_p (step[0], step[1], 0))
6444 return false;
6447 return true;
6451 enum scan_store_kind {
6452 /* Normal permutation. */
6453 scan_store_kind_perm,
6455 /* Whole vector left shift permutation with zero init. */
6456 scan_store_kind_lshift_zero,
6458 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6459 scan_store_kind_lshift_cond
6462 /* Function check_scan_store.
6464 Verify if we can perform the needed permutations or whole vector shifts.
6465 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6466 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6467 to do at each step. */
6469 static int
6470 scan_store_can_perm_p (tree vectype, tree init,
6471 vec<enum scan_store_kind> *use_whole_vector = NULL)
6473 enum machine_mode vec_mode = TYPE_MODE (vectype);
6474 unsigned HOST_WIDE_INT nunits;
6475 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6476 return -1;
6477 int units_log2 = exact_log2 (nunits);
6478 if (units_log2 <= 0)
6479 return -1;
6481 int i;
6482 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6483 for (i = 0; i <= units_log2; ++i)
6485 unsigned HOST_WIDE_INT j, k;
6486 enum scan_store_kind kind = scan_store_kind_perm;
6487 vec_perm_builder sel (nunits, nunits, 1);
6488 sel.quick_grow (nunits);
6489 if (i == units_log2)
6491 for (j = 0; j < nunits; ++j)
6492 sel[j] = nunits - 1;
6494 else
6496 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6497 sel[j] = j;
6498 for (k = 0; j < nunits; ++j, ++k)
6499 sel[j] = nunits + k;
6501 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6502 if (!can_vec_perm_const_p (vec_mode, indices))
6504 if (i == units_log2)
6505 return -1;
6507 if (whole_vector_shift_kind == scan_store_kind_perm)
6509 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6510 return -1;
6511 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6512 /* Whole vector shifts shift in zeros, so if init is all zero
6513 constant, there is no need to do anything further. */
6514 if ((TREE_CODE (init) != INTEGER_CST
6515 && TREE_CODE (init) != REAL_CST)
6516 || !initializer_zerop (init))
6518 tree masktype = truth_type_for (vectype);
6519 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6520 return -1;
6521 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6524 kind = whole_vector_shift_kind;
6526 if (use_whole_vector)
6528 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6529 use_whole_vector->safe_grow_cleared (i, true);
6530 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6531 use_whole_vector->safe_push (kind);
6535 return units_log2;
6539 /* Function check_scan_store.
6541 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6543 static bool
6544 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6545 enum vect_def_type rhs_dt, bool slp, tree mask,
6546 vect_memory_access_type memory_access_type)
6548 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6549 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6550 tree ref_type;
6552 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6553 if (slp
6554 || mask
6555 || memory_access_type != VMAT_CONTIGUOUS
6556 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6557 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6558 || loop_vinfo == NULL
6559 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6560 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6561 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6562 || !integer_zerop (DR_INIT (dr_info->dr))
6563 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6564 || !alias_sets_conflict_p (get_alias_set (vectype),
6565 get_alias_set (TREE_TYPE (ref_type))))
6567 if (dump_enabled_p ())
6568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6569 "unsupported OpenMP scan store.\n");
6570 return false;
6573 /* We need to pattern match code built by OpenMP lowering and simplified
6574 by following optimizations into something we can handle.
6575 #pragma omp simd reduction(inscan,+:r)
6576 for (...)
6578 r += something ();
6579 #pragma omp scan inclusive (r)
6580 use (r);
6582 shall have body with:
6583 // Initialization for input phase, store the reduction initializer:
6584 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6585 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6586 D.2042[_21] = 0;
6587 // Actual input phase:
6589 r.0_5 = D.2042[_20];
6590 _6 = _4 + r.0_5;
6591 D.2042[_20] = _6;
6592 // Initialization for scan phase:
6593 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6594 _26 = D.2043[_25];
6595 _27 = D.2042[_25];
6596 _28 = _26 + _27;
6597 D.2043[_25] = _28;
6598 D.2042[_25] = _28;
6599 // Actual scan phase:
6601 r.1_8 = D.2042[_20];
6603 The "omp simd array" variable D.2042 holds the privatized copy used
6604 inside of the loop and D.2043 is another one that holds copies of
6605 the current original list item. The separate GOMP_SIMD_LANE ifn
6606 kinds are there in order to allow optimizing the initializer store
6607 and combiner sequence, e.g. if it is originally some C++ish user
6608 defined reduction, but allow the vectorizer to pattern recognize it
6609 and turn into the appropriate vectorized scan.
6611 For exclusive scan, this is slightly different:
6612 #pragma omp simd reduction(inscan,+:r)
6613 for (...)
6615 use (r);
6616 #pragma omp scan exclusive (r)
6617 r += something ();
6619 shall have body with:
6620 // Initialization for input phase, store the reduction initializer:
6621 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6622 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6623 D.2042[_21] = 0;
6624 // Actual input phase:
6626 r.0_5 = D.2042[_20];
6627 _6 = _4 + r.0_5;
6628 D.2042[_20] = _6;
6629 // Initialization for scan phase:
6630 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6631 _26 = D.2043[_25];
6632 D.2044[_25] = _26;
6633 _27 = D.2042[_25];
6634 _28 = _26 + _27;
6635 D.2043[_25] = _28;
6636 // Actual scan phase:
6638 r.1_8 = D.2044[_20];
6639 ... */
6641 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6643 /* Match the D.2042[_21] = 0; store above. Just require that
6644 it is a constant or external definition store. */
6645 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6647 fail_init:
6648 if (dump_enabled_p ())
6649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6650 "unsupported OpenMP scan initializer store.\n");
6651 return false;
6654 if (! loop_vinfo->scan_map)
6655 loop_vinfo->scan_map = new hash_map<tree, tree>;
6656 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6657 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6658 if (cached)
6659 goto fail_init;
6660 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6662 /* These stores can be vectorized normally. */
6663 return true;
6666 if (rhs_dt != vect_internal_def)
6668 fail:
6669 if (dump_enabled_p ())
6670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6671 "unsupported OpenMP scan combiner pattern.\n");
6672 return false;
6675 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6676 tree rhs = gimple_assign_rhs1 (stmt);
6677 if (TREE_CODE (rhs) != SSA_NAME)
6678 goto fail;
6680 gimple *other_store_stmt = NULL;
6681 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6682 bool inscan_var_store
6683 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6685 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6687 if (!inscan_var_store)
6689 use_operand_p use_p;
6690 imm_use_iterator iter;
6691 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6693 gimple *use_stmt = USE_STMT (use_p);
6694 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6695 continue;
6696 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6697 || !is_gimple_assign (use_stmt)
6698 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6699 || other_store_stmt
6700 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6701 goto fail;
6702 other_store_stmt = use_stmt;
6704 if (other_store_stmt == NULL)
6705 goto fail;
6706 rhs = gimple_assign_lhs (other_store_stmt);
6707 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6708 goto fail;
6711 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6713 use_operand_p use_p;
6714 imm_use_iterator iter;
6715 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6717 gimple *use_stmt = USE_STMT (use_p);
6718 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6719 continue;
6720 if (other_store_stmt)
6721 goto fail;
6722 other_store_stmt = use_stmt;
6725 else
6726 goto fail;
6728 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6729 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6730 || !is_gimple_assign (def_stmt)
6731 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6732 goto fail;
6734 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6735 /* For pointer addition, we should use the normal plus for the vector
6736 operation. */
6737 switch (code)
6739 case POINTER_PLUS_EXPR:
6740 code = PLUS_EXPR;
6741 break;
6742 case MULT_HIGHPART_EXPR:
6743 goto fail;
6744 default:
6745 break;
6747 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6748 goto fail;
6750 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6751 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6752 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6753 goto fail;
6755 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6756 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6757 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6758 || !gimple_assign_load_p (load1_stmt)
6759 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6760 || !gimple_assign_load_p (load2_stmt))
6761 goto fail;
6763 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6764 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6765 if (load1_stmt_info == NULL
6766 || load2_stmt_info == NULL
6767 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6768 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6769 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6770 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6771 goto fail;
6773 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6775 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6776 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6777 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6778 goto fail;
6779 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6780 tree lrhs;
6781 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6782 lrhs = rhs1;
6783 else
6784 lrhs = rhs2;
6785 use_operand_p use_p;
6786 imm_use_iterator iter;
6787 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6789 gimple *use_stmt = USE_STMT (use_p);
6790 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6791 continue;
6792 if (other_store_stmt)
6793 goto fail;
6794 other_store_stmt = use_stmt;
6798 if (other_store_stmt == NULL)
6799 goto fail;
6800 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6801 || !gimple_store_p (other_store_stmt))
6802 goto fail;
6804 stmt_vec_info other_store_stmt_info
6805 = loop_vinfo->lookup_stmt (other_store_stmt);
6806 if (other_store_stmt_info == NULL
6807 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6808 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6809 goto fail;
6811 gimple *stmt1 = stmt;
6812 gimple *stmt2 = other_store_stmt;
6813 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6814 std::swap (stmt1, stmt2);
6815 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6816 gimple_assign_rhs1 (load2_stmt)))
6818 std::swap (rhs1, rhs2);
6819 std::swap (load1_stmt, load2_stmt);
6820 std::swap (load1_stmt_info, load2_stmt_info);
6822 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6823 gimple_assign_rhs1 (load1_stmt)))
6824 goto fail;
6826 tree var3 = NULL_TREE;
6827 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6828 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6829 gimple_assign_rhs1 (load2_stmt)))
6830 goto fail;
6831 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6833 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6834 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6835 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6836 goto fail;
6837 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6838 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6839 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6840 || lookup_attribute ("omp simd inscan exclusive",
6841 DECL_ATTRIBUTES (var3)))
6842 goto fail;
6845 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6846 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6847 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6848 goto fail;
6850 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6851 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6852 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6853 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6854 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6855 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6856 goto fail;
6858 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6859 std::swap (var1, var2);
6861 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6863 if (!lookup_attribute ("omp simd inscan exclusive",
6864 DECL_ATTRIBUTES (var1)))
6865 goto fail;
6866 var1 = var3;
6869 if (loop_vinfo->scan_map == NULL)
6870 goto fail;
6871 tree *init = loop_vinfo->scan_map->get (var1);
6872 if (init == NULL)
6873 goto fail;
6875 /* The IL is as expected, now check if we can actually vectorize it.
6876 Inclusive scan:
6877 _26 = D.2043[_25];
6878 _27 = D.2042[_25];
6879 _28 = _26 + _27;
6880 D.2043[_25] = _28;
6881 D.2042[_25] = _28;
6882 should be vectorized as (where _40 is the vectorized rhs
6883 from the D.2042[_21] = 0; store):
6884 _30 = MEM <vector(8) int> [(int *)&D.2043];
6885 _31 = MEM <vector(8) int> [(int *)&D.2042];
6886 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6887 _33 = _31 + _32;
6888 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6889 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6890 _35 = _33 + _34;
6891 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6892 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6893 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6894 _37 = _35 + _36;
6895 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6896 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6897 _38 = _30 + _37;
6898 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6899 MEM <vector(8) int> [(int *)&D.2043] = _39;
6900 MEM <vector(8) int> [(int *)&D.2042] = _38;
6901 Exclusive scan:
6902 _26 = D.2043[_25];
6903 D.2044[_25] = _26;
6904 _27 = D.2042[_25];
6905 _28 = _26 + _27;
6906 D.2043[_25] = _28;
6907 should be vectorized as (where _40 is the vectorized rhs
6908 from the D.2042[_21] = 0; store):
6909 _30 = MEM <vector(8) int> [(int *)&D.2043];
6910 _31 = MEM <vector(8) int> [(int *)&D.2042];
6911 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6912 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6913 _34 = _32 + _33;
6914 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6915 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6916 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6917 _36 = _34 + _35;
6918 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6919 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6920 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6921 _38 = _36 + _37;
6922 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6923 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6924 _39 = _30 + _38;
6925 _50 = _31 + _39;
6926 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6927 MEM <vector(8) int> [(int *)&D.2044] = _39;
6928 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6929 enum machine_mode vec_mode = TYPE_MODE (vectype);
6930 optab optab = optab_for_tree_code (code, vectype, optab_default);
6931 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6932 goto fail;
6934 int units_log2 = scan_store_can_perm_p (vectype, *init);
6935 if (units_log2 == -1)
6936 goto fail;
6938 return true;
6942 /* Function vectorizable_scan_store.
6944 Helper of vectorizable_score, arguments like on vectorizable_store.
6945 Handle only the transformation, checking is done in check_scan_store. */
6947 static bool
6948 vectorizable_scan_store (vec_info *vinfo,
6949 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6950 gimple **vec_stmt, int ncopies)
6952 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6953 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6954 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6955 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6957 if (dump_enabled_p ())
6958 dump_printf_loc (MSG_NOTE, vect_location,
6959 "transform scan store. ncopies = %d\n", ncopies);
6961 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6962 tree rhs = gimple_assign_rhs1 (stmt);
6963 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6965 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6966 bool inscan_var_store
6967 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6969 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6971 use_operand_p use_p;
6972 imm_use_iterator iter;
6973 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6975 gimple *use_stmt = USE_STMT (use_p);
6976 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6977 continue;
6978 rhs = gimple_assign_lhs (use_stmt);
6979 break;
6983 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6984 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6985 if (code == POINTER_PLUS_EXPR)
6986 code = PLUS_EXPR;
6987 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6988 && commutative_tree_code (code));
6989 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6990 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6991 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6992 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6993 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6994 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6995 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6996 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6997 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6998 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6999 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7001 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7003 std::swap (rhs1, rhs2);
7004 std::swap (var1, var2);
7005 std::swap (load1_dr_info, load2_dr_info);
7008 tree *init = loop_vinfo->scan_map->get (var1);
7009 gcc_assert (init);
7011 unsigned HOST_WIDE_INT nunits;
7012 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7013 gcc_unreachable ();
7014 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7015 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7016 gcc_assert (units_log2 > 0);
7017 auto_vec<tree, 16> perms;
7018 perms.quick_grow (units_log2 + 1);
7019 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7020 for (int i = 0; i <= units_log2; ++i)
7022 unsigned HOST_WIDE_INT j, k;
7023 vec_perm_builder sel (nunits, nunits, 1);
7024 sel.quick_grow (nunits);
7025 if (i == units_log2)
7026 for (j = 0; j < nunits; ++j)
7027 sel[j] = nunits - 1;
7028 else
7030 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7031 sel[j] = j;
7032 for (k = 0; j < nunits; ++j, ++k)
7033 sel[j] = nunits + k;
7035 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7036 if (!use_whole_vector.is_empty ()
7037 && use_whole_vector[i] != scan_store_kind_perm)
7039 if (zero_vec == NULL_TREE)
7040 zero_vec = build_zero_cst (vectype);
7041 if (masktype == NULL_TREE
7042 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7043 masktype = truth_type_for (vectype);
7044 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7046 else
7047 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7050 tree vec_oprnd1 = NULL_TREE;
7051 tree vec_oprnd2 = NULL_TREE;
7052 tree vec_oprnd3 = NULL_TREE;
7053 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7054 tree dataref_offset = build_int_cst (ref_type, 0);
7055 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7056 vectype, VMAT_CONTIGUOUS);
7057 tree ldataref_ptr = NULL_TREE;
7058 tree orig = NULL_TREE;
7059 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7060 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7061 auto_vec<tree> vec_oprnds1;
7062 auto_vec<tree> vec_oprnds2;
7063 auto_vec<tree> vec_oprnds3;
7064 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7065 *init, &vec_oprnds1,
7066 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7067 rhs2, &vec_oprnds3);
7068 for (int j = 0; j < ncopies; j++)
7070 vec_oprnd1 = vec_oprnds1[j];
7071 if (ldataref_ptr == NULL)
7072 vec_oprnd2 = vec_oprnds2[j];
7073 vec_oprnd3 = vec_oprnds3[j];
7074 if (j == 0)
7075 orig = vec_oprnd3;
7076 else if (!inscan_var_store)
7077 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7079 if (ldataref_ptr)
7081 vec_oprnd2 = make_ssa_name (vectype);
7082 tree data_ref = fold_build2 (MEM_REF, vectype,
7083 unshare_expr (ldataref_ptr),
7084 dataref_offset);
7085 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7086 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7087 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7088 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7089 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7092 tree v = vec_oprnd2;
7093 for (int i = 0; i < units_log2; ++i)
7095 tree new_temp = make_ssa_name (vectype);
7096 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7097 (zero_vec
7098 && (use_whole_vector[i]
7099 != scan_store_kind_perm))
7100 ? zero_vec : vec_oprnd1, v,
7101 perms[i]);
7102 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7103 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7104 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7106 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7108 /* Whole vector shift shifted in zero bits, but if *init
7109 is not initializer_zerop, we need to replace those elements
7110 with elements from vec_oprnd1. */
7111 tree_vector_builder vb (masktype, nunits, 1);
7112 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7113 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7114 ? boolean_false_node : boolean_true_node);
7116 tree new_temp2 = make_ssa_name (vectype);
7117 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7118 new_temp, vec_oprnd1);
7119 vect_finish_stmt_generation (vinfo, stmt_info,
7120 g, gsi);
7121 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7122 new_temp = new_temp2;
7125 /* For exclusive scan, perform the perms[i] permutation once
7126 more. */
7127 if (i == 0
7128 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7129 && v == vec_oprnd2)
7131 v = new_temp;
7132 --i;
7133 continue;
7136 tree new_temp2 = make_ssa_name (vectype);
7137 g = gimple_build_assign (new_temp2, code, v, new_temp);
7138 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7139 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7141 v = new_temp2;
7144 tree new_temp = make_ssa_name (vectype);
7145 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7146 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7147 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7149 tree last_perm_arg = new_temp;
7150 /* For exclusive scan, new_temp computed above is the exclusive scan
7151 prefix sum. Turn it into inclusive prefix sum for the broadcast
7152 of the last element into orig. */
7153 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7155 last_perm_arg = make_ssa_name (vectype);
7156 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7157 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7158 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7161 orig = make_ssa_name (vectype);
7162 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7163 last_perm_arg, perms[units_log2]);
7164 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7165 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7167 if (!inscan_var_store)
7169 tree data_ref = fold_build2 (MEM_REF, vectype,
7170 unshare_expr (dataref_ptr),
7171 dataref_offset);
7172 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7173 g = gimple_build_assign (data_ref, new_temp);
7174 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7175 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7179 if (inscan_var_store)
7180 for (int j = 0; j < ncopies; j++)
7182 if (j != 0)
7183 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7185 tree data_ref = fold_build2 (MEM_REF, vectype,
7186 unshare_expr (dataref_ptr),
7187 dataref_offset);
7188 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7189 gimple *g = gimple_build_assign (data_ref, orig);
7190 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7191 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7193 return true;
7197 /* Function vectorizable_store.
7199 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7200 that can be vectorized.
7201 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7202 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7203 Return true if STMT_INFO is vectorizable in this way. */
7205 static bool
7206 vectorizable_store (vec_info *vinfo,
7207 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7208 gimple **vec_stmt, slp_tree slp_node,
7209 stmt_vector_for_cost *cost_vec)
7211 tree data_ref;
7212 tree op;
7213 tree vec_oprnd = NULL_TREE;
7214 tree elem_type;
7215 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7216 class loop *loop = NULL;
7217 machine_mode vec_mode;
7218 tree dummy;
7219 enum vect_def_type rhs_dt = vect_unknown_def_type;
7220 enum vect_def_type mask_dt = vect_unknown_def_type;
7221 tree dataref_ptr = NULL_TREE;
7222 tree dataref_offset = NULL_TREE;
7223 gimple *ptr_incr = NULL;
7224 int ncopies;
7225 int j;
7226 stmt_vec_info first_stmt_info;
7227 bool grouped_store;
7228 unsigned int group_size, i;
7229 vec<tree> oprnds = vNULL;
7230 vec<tree> result_chain = vNULL;
7231 tree offset = NULL_TREE;
7232 vec<tree> vec_oprnds = vNULL;
7233 bool slp = (slp_node != NULL);
7234 unsigned int vec_num;
7235 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7236 tree aggr_type;
7237 gather_scatter_info gs_info;
7238 poly_uint64 vf;
7239 vec_load_store_type vls_type;
7240 tree ref_type;
7242 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7243 return false;
7245 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7246 && ! vec_stmt)
7247 return false;
7249 /* Is vectorizable store? */
7251 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7252 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7254 tree scalar_dest = gimple_assign_lhs (assign);
7255 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7256 && is_pattern_stmt_p (stmt_info))
7257 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7258 if (TREE_CODE (scalar_dest) != ARRAY_REF
7259 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7260 && TREE_CODE (scalar_dest) != INDIRECT_REF
7261 && TREE_CODE (scalar_dest) != COMPONENT_REF
7262 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7263 && TREE_CODE (scalar_dest) != REALPART_EXPR
7264 && TREE_CODE (scalar_dest) != MEM_REF)
7265 return false;
7267 else
7269 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7270 if (!call || !gimple_call_internal_p (call))
7271 return false;
7273 internal_fn ifn = gimple_call_internal_fn (call);
7274 if (!internal_store_fn_p (ifn))
7275 return false;
7277 if (slp_node != NULL)
7279 if (dump_enabled_p ())
7280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7281 "SLP of masked stores not supported.\n");
7282 return false;
7285 int mask_index = internal_fn_mask_index (ifn);
7286 if (mask_index >= 0
7287 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7288 &mask, NULL, &mask_dt, &mask_vectype))
7289 return false;
7292 op = vect_get_store_rhs (stmt_info);
7294 /* Cannot have hybrid store SLP -- that would mean storing to the
7295 same location twice. */
7296 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7298 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7299 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7301 if (loop_vinfo)
7303 loop = LOOP_VINFO_LOOP (loop_vinfo);
7304 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7306 else
7307 vf = 1;
7309 /* Multiple types in SLP are handled by creating the appropriate number of
7310 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7311 case of SLP. */
7312 if (slp)
7313 ncopies = 1;
7314 else
7315 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7317 gcc_assert (ncopies >= 1);
7319 /* FORNOW. This restriction should be relaxed. */
7320 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7322 if (dump_enabled_p ())
7323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7324 "multiple types in nested loop.\n");
7325 return false;
7328 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7329 op, &rhs_dt, &rhs_vectype, &vls_type))
7330 return false;
7332 elem_type = TREE_TYPE (vectype);
7333 vec_mode = TYPE_MODE (vectype);
7335 if (!STMT_VINFO_DATA_REF (stmt_info))
7336 return false;
7338 vect_memory_access_type memory_access_type;
7339 enum dr_alignment_support alignment_support_scheme;
7340 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7341 ncopies, &memory_access_type,
7342 &alignment_support_scheme, &gs_info))
7343 return false;
7345 if (mask)
7347 if (memory_access_type == VMAT_CONTIGUOUS)
7349 if (!VECTOR_MODE_P (vec_mode)
7350 || !can_vec_mask_load_store_p (vec_mode,
7351 TYPE_MODE (mask_vectype), false))
7352 return false;
7354 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7355 && (memory_access_type != VMAT_GATHER_SCATTER
7356 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7358 if (dump_enabled_p ())
7359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7360 "unsupported access type for masked store.\n");
7361 return false;
7364 else
7366 /* FORNOW. In some cases can vectorize even if data-type not supported
7367 (e.g. - array initialization with 0). */
7368 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7369 return false;
7372 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7373 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7374 && memory_access_type != VMAT_GATHER_SCATTER
7375 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7376 if (grouped_store)
7378 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7379 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7380 group_size = DR_GROUP_SIZE (first_stmt_info);
7382 else
7384 first_stmt_info = stmt_info;
7385 first_dr_info = dr_info;
7386 group_size = vec_num = 1;
7389 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7391 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7392 memory_access_type))
7393 return false;
7396 if (!vec_stmt) /* transformation not required. */
7398 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7400 if (loop_vinfo
7401 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7402 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7403 group_size, memory_access_type,
7404 &gs_info, mask);
7406 if (slp_node
7407 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7408 vectype))
7410 if (dump_enabled_p ())
7411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7412 "incompatible vector types for invariants\n");
7413 return false;
7416 if (dump_enabled_p ()
7417 && memory_access_type != VMAT_ELEMENTWISE
7418 && memory_access_type != VMAT_GATHER_SCATTER
7419 && alignment_support_scheme != dr_aligned)
7420 dump_printf_loc (MSG_NOTE, vect_location,
7421 "Vectorizing an unaligned access.\n");
7423 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7424 vect_model_store_cost (vinfo, stmt_info, ncopies,
7425 memory_access_type, vls_type, slp_node, cost_vec);
7426 return true;
7428 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7430 /* Transform. */
7432 ensure_base_align (dr_info);
7434 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7436 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7437 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7438 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7439 tree ptr, var, scale, vec_mask;
7440 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7441 tree mask_halfvectype = mask_vectype;
7442 edge pe = loop_preheader_edge (loop);
7443 gimple_seq seq;
7444 basic_block new_bb;
7445 enum { NARROW, NONE, WIDEN } modifier;
7446 poly_uint64 scatter_off_nunits
7447 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7449 if (known_eq (nunits, scatter_off_nunits))
7450 modifier = NONE;
7451 else if (known_eq (nunits * 2, scatter_off_nunits))
7453 modifier = WIDEN;
7455 /* Currently gathers and scatters are only supported for
7456 fixed-length vectors. */
7457 unsigned int count = scatter_off_nunits.to_constant ();
7458 vec_perm_builder sel (count, count, 1);
7459 for (i = 0; i < (unsigned int) count; ++i)
7460 sel.quick_push (i | (count / 2));
7462 vec_perm_indices indices (sel, 1, count);
7463 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7464 indices);
7465 gcc_assert (perm_mask != NULL_TREE);
7467 else if (known_eq (nunits, scatter_off_nunits * 2))
7469 modifier = NARROW;
7471 /* Currently gathers and scatters are only supported for
7472 fixed-length vectors. */
7473 unsigned int count = nunits.to_constant ();
7474 vec_perm_builder sel (count, count, 1);
7475 for (i = 0; i < (unsigned int) count; ++i)
7476 sel.quick_push (i | (count / 2));
7478 vec_perm_indices indices (sel, 2, count);
7479 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7480 gcc_assert (perm_mask != NULL_TREE);
7481 ncopies *= 2;
7483 if (mask)
7484 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7486 else
7487 gcc_unreachable ();
7489 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7490 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7491 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7492 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7493 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7494 scaletype = TREE_VALUE (arglist);
7496 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7497 && TREE_CODE (rettype) == VOID_TYPE);
7499 ptr = fold_convert (ptrtype, gs_info.base);
7500 if (!is_gimple_min_invariant (ptr))
7502 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7503 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7504 gcc_assert (!new_bb);
7507 if (mask == NULL_TREE)
7509 mask_arg = build_int_cst (masktype, -1);
7510 mask_arg = vect_init_vector (vinfo, stmt_info,
7511 mask_arg, masktype, NULL);
7514 scale = build_int_cst (scaletype, gs_info.scale);
7516 auto_vec<tree> vec_oprnds0;
7517 auto_vec<tree> vec_oprnds1;
7518 auto_vec<tree> vec_masks;
7519 if (mask)
7521 tree mask_vectype = truth_type_for (vectype);
7522 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7523 modifier == NARROW
7524 ? ncopies / 2 : ncopies,
7525 mask, &vec_masks, mask_vectype);
7527 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7528 modifier == WIDEN
7529 ? ncopies / 2 : ncopies,
7530 gs_info.offset, &vec_oprnds0);
7531 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7532 modifier == NARROW
7533 ? ncopies / 2 : ncopies,
7534 op, &vec_oprnds1);
7535 for (j = 0; j < ncopies; ++j)
7537 if (modifier == WIDEN)
7539 if (j & 1)
7540 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7541 perm_mask, stmt_info, gsi);
7542 else
7543 op = vec_oprnd0 = vec_oprnds0[j / 2];
7544 src = vec_oprnd1 = vec_oprnds1[j];
7545 if (mask)
7546 mask_op = vec_mask = vec_masks[j];
7548 else if (modifier == NARROW)
7550 if (j & 1)
7551 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7552 perm_mask, stmt_info, gsi);
7553 else
7554 src = vec_oprnd1 = vec_oprnds1[j / 2];
7555 op = vec_oprnd0 = vec_oprnds0[j];
7556 if (mask)
7557 mask_op = vec_mask = vec_masks[j / 2];
7559 else
7561 op = vec_oprnd0 = vec_oprnds0[j];
7562 src = vec_oprnd1 = vec_oprnds1[j];
7563 if (mask)
7564 mask_op = vec_mask = vec_masks[j];
7567 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7569 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7570 TYPE_VECTOR_SUBPARTS (srctype)));
7571 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7572 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7573 gassign *new_stmt
7574 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7575 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7576 src = var;
7579 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7581 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7582 TYPE_VECTOR_SUBPARTS (idxtype)));
7583 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7584 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7585 gassign *new_stmt
7586 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7587 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7588 op = var;
7591 if (mask)
7593 tree utype;
7594 mask_arg = mask_op;
7595 if (modifier == NARROW)
7597 var = vect_get_new_ssa_name (mask_halfvectype,
7598 vect_simple_var);
7599 gassign *new_stmt
7600 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7601 : VEC_UNPACK_LO_EXPR,
7602 mask_op);
7603 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7604 mask_arg = var;
7606 tree optype = TREE_TYPE (mask_arg);
7607 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7608 utype = masktype;
7609 else
7610 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7611 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7612 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7613 gassign *new_stmt
7614 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7615 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7616 mask_arg = var;
7617 if (!useless_type_conversion_p (masktype, utype))
7619 gcc_assert (TYPE_PRECISION (utype)
7620 <= TYPE_PRECISION (masktype));
7621 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7622 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7623 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7624 mask_arg = var;
7628 gcall *new_stmt
7629 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7630 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7632 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7634 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7635 return true;
7637 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7638 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7640 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7641 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7643 if (grouped_store)
7645 /* FORNOW */
7646 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7648 /* We vectorize all the stmts of the interleaving group when we
7649 reach the last stmt in the group. */
7650 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7651 < DR_GROUP_SIZE (first_stmt_info)
7652 && !slp)
7654 *vec_stmt = NULL;
7655 return true;
7658 if (slp)
7660 grouped_store = false;
7661 /* VEC_NUM is the number of vect stmts to be created for this
7662 group. */
7663 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7664 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7665 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7666 == first_stmt_info);
7667 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7668 op = vect_get_store_rhs (first_stmt_info);
7670 else
7671 /* VEC_NUM is the number of vect stmts to be created for this
7672 group. */
7673 vec_num = group_size;
7675 ref_type = get_group_alias_ptr_type (first_stmt_info);
7677 else
7678 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7680 if (dump_enabled_p ())
7681 dump_printf_loc (MSG_NOTE, vect_location,
7682 "transform store. ncopies = %d\n", ncopies);
7684 if (memory_access_type == VMAT_ELEMENTWISE
7685 || memory_access_type == VMAT_STRIDED_SLP)
7687 gimple_stmt_iterator incr_gsi;
7688 bool insert_after;
7689 gimple *incr;
7690 tree offvar;
7691 tree ivstep;
7692 tree running_off;
7693 tree stride_base, stride_step, alias_off;
7694 tree vec_oprnd;
7695 tree dr_offset;
7696 unsigned int g;
7697 /* Checked by get_load_store_type. */
7698 unsigned int const_nunits = nunits.to_constant ();
7700 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7701 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7703 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7704 stride_base
7705 = fold_build_pointer_plus
7706 (DR_BASE_ADDRESS (first_dr_info->dr),
7707 size_binop (PLUS_EXPR,
7708 convert_to_ptrofftype (dr_offset),
7709 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7710 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7712 /* For a store with loop-invariant (but other than power-of-2)
7713 stride (i.e. not a grouped access) like so:
7715 for (i = 0; i < n; i += stride)
7716 array[i] = ...;
7718 we generate a new induction variable and new stores from
7719 the components of the (vectorized) rhs:
7721 for (j = 0; ; j += VF*stride)
7722 vectemp = ...;
7723 tmp1 = vectemp[0];
7724 array[j] = tmp1;
7725 tmp2 = vectemp[1];
7726 array[j + stride] = tmp2;
7730 unsigned nstores = const_nunits;
7731 unsigned lnel = 1;
7732 tree ltype = elem_type;
7733 tree lvectype = vectype;
7734 if (slp)
7736 if (group_size < const_nunits
7737 && const_nunits % group_size == 0)
7739 nstores = const_nunits / group_size;
7740 lnel = group_size;
7741 ltype = build_vector_type (elem_type, group_size);
7742 lvectype = vectype;
7744 /* First check if vec_extract optab doesn't support extraction
7745 of vector elts directly. */
7746 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7747 machine_mode vmode;
7748 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7749 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7750 group_size).exists (&vmode)
7751 || (convert_optab_handler (vec_extract_optab,
7752 TYPE_MODE (vectype), vmode)
7753 == CODE_FOR_nothing))
7755 /* Try to avoid emitting an extract of vector elements
7756 by performing the extracts using an integer type of the
7757 same size, extracting from a vector of those and then
7758 re-interpreting it as the original vector type if
7759 supported. */
7760 unsigned lsize
7761 = group_size * GET_MODE_BITSIZE (elmode);
7762 unsigned int lnunits = const_nunits / group_size;
7763 /* If we can't construct such a vector fall back to
7764 element extracts from the original vector type and
7765 element size stores. */
7766 if (int_mode_for_size (lsize, 0).exists (&elmode)
7767 && VECTOR_MODE_P (TYPE_MODE (vectype))
7768 && related_vector_mode (TYPE_MODE (vectype), elmode,
7769 lnunits).exists (&vmode)
7770 && (convert_optab_handler (vec_extract_optab,
7771 vmode, elmode)
7772 != CODE_FOR_nothing))
7774 nstores = lnunits;
7775 lnel = group_size;
7776 ltype = build_nonstandard_integer_type (lsize, 1);
7777 lvectype = build_vector_type (ltype, nstores);
7779 /* Else fall back to vector extraction anyway.
7780 Fewer stores are more important than avoiding spilling
7781 of the vector we extract from. Compared to the
7782 construction case in vectorizable_load no store-forwarding
7783 issue exists here for reasonable archs. */
7786 else if (group_size >= const_nunits
7787 && group_size % const_nunits == 0)
7789 nstores = 1;
7790 lnel = const_nunits;
7791 ltype = vectype;
7792 lvectype = vectype;
7794 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7795 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7798 ivstep = stride_step;
7799 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7800 build_int_cst (TREE_TYPE (ivstep), vf));
7802 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7804 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7805 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7806 create_iv (stride_base, ivstep, NULL,
7807 loop, &incr_gsi, insert_after,
7808 &offvar, NULL);
7809 incr = gsi_stmt (incr_gsi);
7811 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7813 alias_off = build_int_cst (ref_type, 0);
7814 stmt_vec_info next_stmt_info = first_stmt_info;
7815 for (g = 0; g < group_size; g++)
7817 running_off = offvar;
7818 if (g)
7820 tree size = TYPE_SIZE_UNIT (ltype);
7821 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7822 size);
7823 tree newoff = copy_ssa_name (running_off, NULL);
7824 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7825 running_off, pos);
7826 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7827 running_off = newoff;
7829 if (!slp)
7830 op = vect_get_store_rhs (next_stmt_info);
7831 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7832 op, &vec_oprnds);
7833 unsigned int group_el = 0;
7834 unsigned HOST_WIDE_INT
7835 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7836 for (j = 0; j < ncopies; j++)
7838 vec_oprnd = vec_oprnds[j];
7839 /* Pun the vector to extract from if necessary. */
7840 if (lvectype != vectype)
7842 tree tem = make_ssa_name (lvectype);
7843 gimple *pun
7844 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7845 lvectype, vec_oprnd));
7846 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7847 vec_oprnd = tem;
7849 for (i = 0; i < nstores; i++)
7851 tree newref, newoff;
7852 gimple *incr, *assign;
7853 tree size = TYPE_SIZE (ltype);
7854 /* Extract the i'th component. */
7855 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7856 bitsize_int (i), size);
7857 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7858 size, pos);
7860 elem = force_gimple_operand_gsi (gsi, elem, true,
7861 NULL_TREE, true,
7862 GSI_SAME_STMT);
7864 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7865 group_el * elsz);
7866 newref = build2 (MEM_REF, ltype,
7867 running_off, this_off);
7868 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7870 /* And store it to *running_off. */
7871 assign = gimple_build_assign (newref, elem);
7872 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7874 group_el += lnel;
7875 if (! slp
7876 || group_el == group_size)
7878 newoff = copy_ssa_name (running_off, NULL);
7879 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7880 running_off, stride_step);
7881 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7883 running_off = newoff;
7884 group_el = 0;
7886 if (g == group_size - 1
7887 && !slp)
7889 if (j == 0 && i == 0)
7890 *vec_stmt = assign;
7891 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7895 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7896 vec_oprnds.release ();
7897 if (slp)
7898 break;
7901 return true;
7904 auto_vec<tree> dr_chain (group_size);
7905 oprnds.create (group_size);
7907 /* Gather-scatter accesses perform only component accesses, alignment
7908 is irrelevant for them. */
7909 if (memory_access_type == VMAT_GATHER_SCATTER)
7910 alignment_support_scheme = dr_unaligned_supported;
7911 else
7912 alignment_support_scheme
7913 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, false);
7915 gcc_assert (alignment_support_scheme);
7916 vec_loop_masks *loop_masks
7917 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7918 ? &LOOP_VINFO_MASKS (loop_vinfo)
7919 : NULL);
7920 vec_loop_lens *loop_lens
7921 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7922 ? &LOOP_VINFO_LENS (loop_vinfo)
7923 : NULL);
7925 /* Shouldn't go with length-based approach if fully masked. */
7926 gcc_assert (!loop_lens || !loop_masks);
7928 /* Targets with store-lane instructions must not require explicit
7929 realignment. vect_supportable_dr_alignment always returns either
7930 dr_aligned or dr_unaligned_supported for masked operations. */
7931 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7932 && !mask
7933 && !loop_masks)
7934 || alignment_support_scheme == dr_aligned
7935 || alignment_support_scheme == dr_unaligned_supported);
7937 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7938 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7939 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7941 tree bump;
7942 tree vec_offset = NULL_TREE;
7943 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7945 aggr_type = NULL_TREE;
7946 bump = NULL_TREE;
7948 else if (memory_access_type == VMAT_GATHER_SCATTER)
7950 aggr_type = elem_type;
7951 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7952 &bump, &vec_offset);
7954 else
7956 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7957 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7958 else
7959 aggr_type = vectype;
7960 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7961 memory_access_type);
7964 if (mask)
7965 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7967 /* In case the vectorization factor (VF) is bigger than the number
7968 of elements that we can fit in a vectype (nunits), we have to generate
7969 more than one vector stmt - i.e - we need to "unroll" the
7970 vector stmt by a factor VF/nunits. */
7972 /* In case of interleaving (non-unit grouped access):
7974 S1: &base + 2 = x2
7975 S2: &base = x0
7976 S3: &base + 1 = x1
7977 S4: &base + 3 = x3
7979 We create vectorized stores starting from base address (the access of the
7980 first stmt in the chain (S2 in the above example), when the last store stmt
7981 of the chain (S4) is reached:
7983 VS1: &base = vx2
7984 VS2: &base + vec_size*1 = vx0
7985 VS3: &base + vec_size*2 = vx1
7986 VS4: &base + vec_size*3 = vx3
7988 Then permutation statements are generated:
7990 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7991 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7994 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7995 (the order of the data-refs in the output of vect_permute_store_chain
7996 corresponds to the order of scalar stmts in the interleaving chain - see
7997 the documentation of vect_permute_store_chain()).
7999 In case of both multiple types and interleaving, above vector stores and
8000 permutation stmts are created for every copy. The result vector stmts are
8001 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8002 STMT_VINFO_RELATED_STMT for the next copies.
8005 auto_vec<tree> vec_masks;
8006 tree vec_mask = NULL;
8007 auto_vec<tree> vec_offsets;
8008 auto_vec<vec<tree> > gvec_oprnds;
8009 gvec_oprnds.safe_grow_cleared (group_size, true);
8010 for (j = 0; j < ncopies; j++)
8012 gimple *new_stmt;
8013 if (j == 0)
8015 if (slp)
8017 /* Get vectorized arguments for SLP_NODE. */
8018 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8019 op, &vec_oprnds);
8020 vec_oprnd = vec_oprnds[0];
8022 else
8024 /* For interleaved stores we collect vectorized defs for all the
8025 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8026 used as an input to vect_permute_store_chain().
8028 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8029 and OPRNDS are of size 1. */
8030 stmt_vec_info next_stmt_info = first_stmt_info;
8031 for (i = 0; i < group_size; i++)
8033 /* Since gaps are not supported for interleaved stores,
8034 DR_GROUP_SIZE is the exact number of stmts in the chain.
8035 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8036 that there is no interleaving, DR_GROUP_SIZE is 1,
8037 and only one iteration of the loop will be executed. */
8038 op = vect_get_store_rhs (next_stmt_info);
8039 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8040 ncopies, op, &gvec_oprnds[i]);
8041 vec_oprnd = gvec_oprnds[i][0];
8042 dr_chain.quick_push (gvec_oprnds[i][0]);
8043 oprnds.quick_push (gvec_oprnds[i][0]);
8044 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8046 if (mask)
8048 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8049 mask, &vec_masks, mask_vectype);
8050 vec_mask = vec_masks[0];
8054 /* We should have catched mismatched types earlier. */
8055 gcc_assert (useless_type_conversion_p (vectype,
8056 TREE_TYPE (vec_oprnd)));
8057 bool simd_lane_access_p
8058 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8059 if (simd_lane_access_p
8060 && !loop_masks
8061 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8062 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8063 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8064 && integer_zerop (DR_INIT (first_dr_info->dr))
8065 && alias_sets_conflict_p (get_alias_set (aggr_type),
8066 get_alias_set (TREE_TYPE (ref_type))))
8068 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8069 dataref_offset = build_int_cst (ref_type, 0);
8071 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8073 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8074 &gs_info, &dataref_ptr,
8075 &vec_offsets);
8076 vec_offset = vec_offsets[0];
8078 else
8079 dataref_ptr
8080 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8081 simd_lane_access_p ? loop : NULL,
8082 offset, &dummy, gsi, &ptr_incr,
8083 simd_lane_access_p, NULL_TREE, bump);
8085 else
8087 /* For interleaved stores we created vectorized defs for all the
8088 defs stored in OPRNDS in the previous iteration (previous copy).
8089 DR_CHAIN is then used as an input to vect_permute_store_chain().
8090 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8091 OPRNDS are of size 1. */
8092 for (i = 0; i < group_size; i++)
8094 vec_oprnd = gvec_oprnds[i][j];
8095 dr_chain[i] = gvec_oprnds[i][j];
8096 oprnds[i] = gvec_oprnds[i][j];
8098 if (mask)
8099 vec_mask = vec_masks[j];
8100 if (dataref_offset)
8101 dataref_offset
8102 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8103 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8104 vec_offset = vec_offsets[j];
8105 else
8106 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8107 stmt_info, bump);
8110 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8112 tree vec_array;
8114 /* Get an array into which we can store the individual vectors. */
8115 vec_array = create_vector_array (vectype, vec_num);
8117 /* Invalidate the current contents of VEC_ARRAY. This should
8118 become an RTL clobber too, which prevents the vector registers
8119 from being upward-exposed. */
8120 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8122 /* Store the individual vectors into the array. */
8123 for (i = 0; i < vec_num; i++)
8125 vec_oprnd = dr_chain[i];
8126 write_vector_array (vinfo, stmt_info,
8127 gsi, vec_oprnd, vec_array, i);
8130 tree final_mask = NULL;
8131 if (loop_masks)
8132 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8133 vectype, j);
8134 if (vec_mask)
8135 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8136 vec_mask, gsi);
8138 gcall *call;
8139 if (final_mask)
8141 /* Emit:
8142 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8143 VEC_ARRAY). */
8144 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8145 tree alias_ptr = build_int_cst (ref_type, align);
8146 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8147 dataref_ptr, alias_ptr,
8148 final_mask, vec_array);
8150 else
8152 /* Emit:
8153 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8154 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8155 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8156 vec_array);
8157 gimple_call_set_lhs (call, data_ref);
8159 gimple_call_set_nothrow (call, true);
8160 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8161 new_stmt = call;
8163 /* Record that VEC_ARRAY is now dead. */
8164 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8166 else
8168 new_stmt = NULL;
8169 if (grouped_store)
8171 if (j == 0)
8172 result_chain.create (group_size);
8173 /* Permute. */
8174 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8175 gsi, &result_chain);
8178 stmt_vec_info next_stmt_info = first_stmt_info;
8179 for (i = 0; i < vec_num; i++)
8181 unsigned misalign;
8182 unsigned HOST_WIDE_INT align;
8184 tree final_mask = NULL_TREE;
8185 if (loop_masks)
8186 final_mask = vect_get_loop_mask (gsi, loop_masks,
8187 vec_num * ncopies,
8188 vectype, vec_num * j + i);
8189 if (vec_mask)
8190 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8191 vec_mask, gsi);
8193 if (memory_access_type == VMAT_GATHER_SCATTER)
8195 tree scale = size_int (gs_info.scale);
8196 gcall *call;
8197 if (final_mask)
8198 call = gimple_build_call_internal
8199 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8200 scale, vec_oprnd, final_mask);
8201 else
8202 call = gimple_build_call_internal
8203 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8204 scale, vec_oprnd);
8205 gimple_call_set_nothrow (call, true);
8206 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8207 new_stmt = call;
8208 break;
8211 if (i > 0)
8212 /* Bump the vector pointer. */
8213 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8214 gsi, stmt_info, bump);
8216 if (slp)
8217 vec_oprnd = vec_oprnds[i];
8218 else if (grouped_store)
8219 /* For grouped stores vectorized defs are interleaved in
8220 vect_permute_store_chain(). */
8221 vec_oprnd = result_chain[i];
8223 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8224 if (aligned_access_p (first_dr_info, vectype))
8225 misalign = 0;
8226 else if (dr_misalignment (first_dr_info, vectype)
8227 == DR_MISALIGNMENT_UNKNOWN)
8229 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8230 misalign = 0;
8232 else
8233 misalign = dr_misalignment (first_dr_info, vectype);
8234 if (dataref_offset == NULL_TREE
8235 && TREE_CODE (dataref_ptr) == SSA_NAME)
8236 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8237 misalign);
8238 align = least_bit_hwi (misalign | align);
8240 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8242 tree perm_mask = perm_mask_for_reverse (vectype);
8243 tree perm_dest = vect_create_destination_var
8244 (vect_get_store_rhs (stmt_info), vectype);
8245 tree new_temp = make_ssa_name (perm_dest);
8247 /* Generate the permute statement. */
8248 gimple *perm_stmt
8249 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8250 vec_oprnd, perm_mask);
8251 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8253 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8254 vec_oprnd = new_temp;
8257 /* Arguments are ready. Create the new vector stmt. */
8258 if (final_mask)
8260 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8261 gcall *call
8262 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8263 dataref_ptr, ptr,
8264 final_mask, vec_oprnd);
8265 gimple_call_set_nothrow (call, true);
8266 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8267 new_stmt = call;
8269 else if (loop_lens)
8271 tree final_len
8272 = vect_get_loop_len (loop_vinfo, loop_lens,
8273 vec_num * ncopies, vec_num * j + i);
8274 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8275 machine_mode vmode = TYPE_MODE (vectype);
8276 opt_machine_mode new_ovmode
8277 = get_len_load_store_mode (vmode, false);
8278 machine_mode new_vmode = new_ovmode.require ();
8279 /* Need conversion if it's wrapped with VnQI. */
8280 if (vmode != new_vmode)
8282 tree new_vtype
8283 = build_vector_type_for_mode (unsigned_intQI_type_node,
8284 new_vmode);
8285 tree var
8286 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8287 vec_oprnd
8288 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8289 gassign *new_stmt
8290 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8291 vec_oprnd);
8292 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8293 gsi);
8294 vec_oprnd = var;
8296 gcall *call
8297 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8298 ptr, final_len, vec_oprnd);
8299 gimple_call_set_nothrow (call, true);
8300 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8301 new_stmt = call;
8303 else
8305 data_ref = fold_build2 (MEM_REF, vectype,
8306 dataref_ptr,
8307 dataref_offset
8308 ? dataref_offset
8309 : build_int_cst (ref_type, 0));
8310 if (aligned_access_p (first_dr_info, vectype))
8312 else
8313 TREE_TYPE (data_ref)
8314 = build_aligned_type (TREE_TYPE (data_ref),
8315 align * BITS_PER_UNIT);
8316 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8317 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8318 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8321 if (slp)
8322 continue;
8324 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8325 if (!next_stmt_info)
8326 break;
8329 if (!slp)
8331 if (j == 0)
8332 *vec_stmt = new_stmt;
8333 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8337 for (i = 0; i < group_size; ++i)
8339 vec<tree> oprndsi = gvec_oprnds[i];
8340 oprndsi.release ();
8342 oprnds.release ();
8343 result_chain.release ();
8344 vec_oprnds.release ();
8346 return true;
8349 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8350 VECTOR_CST mask. No checks are made that the target platform supports the
8351 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8352 vect_gen_perm_mask_checked. */
8354 tree
8355 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8357 tree mask_type;
8359 poly_uint64 nunits = sel.length ();
8360 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8362 mask_type = build_vector_type (ssizetype, nunits);
8363 return vec_perm_indices_to_tree (mask_type, sel);
8366 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8367 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8369 tree
8370 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8372 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8373 return vect_gen_perm_mask_any (vectype, sel);
8376 /* Given a vector variable X and Y, that was generated for the scalar
8377 STMT_INFO, generate instructions to permute the vector elements of X and Y
8378 using permutation mask MASK_VEC, insert them at *GSI and return the
8379 permuted vector variable. */
8381 static tree
8382 permute_vec_elements (vec_info *vinfo,
8383 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8384 gimple_stmt_iterator *gsi)
8386 tree vectype = TREE_TYPE (x);
8387 tree perm_dest, data_ref;
8388 gimple *perm_stmt;
8390 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8391 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8392 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8393 else
8394 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8395 data_ref = make_ssa_name (perm_dest);
8397 /* Generate the permute statement. */
8398 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8399 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8401 return data_ref;
8404 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8405 inserting them on the loops preheader edge. Returns true if we
8406 were successful in doing so (and thus STMT_INFO can be moved then),
8407 otherwise returns false. */
8409 static bool
8410 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8412 ssa_op_iter i;
8413 tree op;
8414 bool any = false;
8416 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8418 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8419 if (!gimple_nop_p (def_stmt)
8420 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8422 /* Make sure we don't need to recurse. While we could do
8423 so in simple cases when there are more complex use webs
8424 we don't have an easy way to preserve stmt order to fulfil
8425 dependencies within them. */
8426 tree op2;
8427 ssa_op_iter i2;
8428 if (gimple_code (def_stmt) == GIMPLE_PHI)
8429 return false;
8430 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8432 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8433 if (!gimple_nop_p (def_stmt2)
8434 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8435 return false;
8437 any = true;
8441 if (!any)
8442 return true;
8444 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8446 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8447 if (!gimple_nop_p (def_stmt)
8448 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8450 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8451 gsi_remove (&gsi, false);
8452 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8456 return true;
8459 /* vectorizable_load.
8461 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8462 that can be vectorized.
8463 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8464 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8465 Return true if STMT_INFO is vectorizable in this way. */
8467 static bool
8468 vectorizable_load (vec_info *vinfo,
8469 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8470 gimple **vec_stmt, slp_tree slp_node,
8471 stmt_vector_for_cost *cost_vec)
8473 tree scalar_dest;
8474 tree vec_dest = NULL;
8475 tree data_ref = NULL;
8476 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8477 class loop *loop = NULL;
8478 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8479 bool nested_in_vect_loop = false;
8480 tree elem_type;
8481 tree new_temp;
8482 machine_mode mode;
8483 tree dummy;
8484 tree dataref_ptr = NULL_TREE;
8485 tree dataref_offset = NULL_TREE;
8486 gimple *ptr_incr = NULL;
8487 int ncopies;
8488 int i, j;
8489 unsigned int group_size;
8490 poly_uint64 group_gap_adj;
8491 tree msq = NULL_TREE, lsq;
8492 tree offset = NULL_TREE;
8493 tree byte_offset = NULL_TREE;
8494 tree realignment_token = NULL_TREE;
8495 gphi *phi = NULL;
8496 vec<tree> dr_chain = vNULL;
8497 bool grouped_load = false;
8498 stmt_vec_info first_stmt_info;
8499 stmt_vec_info first_stmt_info_for_drptr = NULL;
8500 bool compute_in_loop = false;
8501 class loop *at_loop;
8502 int vec_num;
8503 bool slp = (slp_node != NULL);
8504 bool slp_perm = false;
8505 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8506 poly_uint64 vf;
8507 tree aggr_type;
8508 gather_scatter_info gs_info;
8509 tree ref_type;
8510 enum vect_def_type mask_dt = vect_unknown_def_type;
8512 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8513 return false;
8515 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8516 && ! vec_stmt)
8517 return false;
8519 if (!STMT_VINFO_DATA_REF (stmt_info))
8520 return false;
8522 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8523 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8525 scalar_dest = gimple_assign_lhs (assign);
8526 if (TREE_CODE (scalar_dest) != SSA_NAME)
8527 return false;
8529 tree_code code = gimple_assign_rhs_code (assign);
8530 if (code != ARRAY_REF
8531 && code != BIT_FIELD_REF
8532 && code != INDIRECT_REF
8533 && code != COMPONENT_REF
8534 && code != IMAGPART_EXPR
8535 && code != REALPART_EXPR
8536 && code != MEM_REF
8537 && TREE_CODE_CLASS (code) != tcc_declaration)
8538 return false;
8540 else
8542 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8543 if (!call || !gimple_call_internal_p (call))
8544 return false;
8546 internal_fn ifn = gimple_call_internal_fn (call);
8547 if (!internal_load_fn_p (ifn))
8548 return false;
8550 scalar_dest = gimple_call_lhs (call);
8551 if (!scalar_dest)
8552 return false;
8554 int mask_index = internal_fn_mask_index (ifn);
8555 if (mask_index >= 0
8556 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node,
8557 /* ??? For SLP we only have operands for
8558 the mask operand. */
8559 slp_node ? 0 : mask_index,
8560 &mask, NULL, &mask_dt, &mask_vectype))
8561 return false;
8564 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8565 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8567 if (loop_vinfo)
8569 loop = LOOP_VINFO_LOOP (loop_vinfo);
8570 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8571 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8573 else
8574 vf = 1;
8576 /* Multiple types in SLP are handled by creating the appropriate number of
8577 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8578 case of SLP. */
8579 if (slp)
8580 ncopies = 1;
8581 else
8582 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8584 gcc_assert (ncopies >= 1);
8586 /* FORNOW. This restriction should be relaxed. */
8587 if (nested_in_vect_loop && ncopies > 1)
8589 if (dump_enabled_p ())
8590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8591 "multiple types in nested loop.\n");
8592 return false;
8595 /* Invalidate assumptions made by dependence analysis when vectorization
8596 on the unrolled body effectively re-orders stmts. */
8597 if (ncopies > 1
8598 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8599 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8600 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8602 if (dump_enabled_p ())
8603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8604 "cannot perform implicit CSE when unrolling "
8605 "with negative dependence distance\n");
8606 return false;
8609 elem_type = TREE_TYPE (vectype);
8610 mode = TYPE_MODE (vectype);
8612 /* FORNOW. In some cases can vectorize even if data-type not supported
8613 (e.g. - data copies). */
8614 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8616 if (dump_enabled_p ())
8617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8618 "Aligned load, but unsupported type.\n");
8619 return false;
8622 /* Check if the load is a part of an interleaving chain. */
8623 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8625 grouped_load = true;
8626 /* FORNOW */
8627 gcc_assert (!nested_in_vect_loop);
8628 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8630 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8631 group_size = DR_GROUP_SIZE (first_stmt_info);
8633 /* Refuse non-SLP vectorization of SLP-only groups. */
8634 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8636 if (dump_enabled_p ())
8637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8638 "cannot vectorize load in non-SLP mode.\n");
8639 return false;
8642 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8644 slp_perm = true;
8646 if (!loop_vinfo)
8648 /* In BB vectorization we may not actually use a loaded vector
8649 accessing elements in excess of DR_GROUP_SIZE. */
8650 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8651 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8652 unsigned HOST_WIDE_INT nunits;
8653 unsigned j, k, maxk = 0;
8654 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8655 if (k > maxk)
8656 maxk = k;
8657 tree vectype = SLP_TREE_VECTYPE (slp_node);
8658 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8659 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8661 if (dump_enabled_p ())
8662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8663 "BB vectorization with gaps at the end of "
8664 "a load is not supported\n");
8665 return false;
8669 auto_vec<tree> tem;
8670 unsigned n_perms;
8671 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8672 true, &n_perms))
8674 if (dump_enabled_p ())
8675 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8676 vect_location,
8677 "unsupported load permutation\n");
8678 return false;
8682 /* Invalidate assumptions made by dependence analysis when vectorization
8683 on the unrolled body effectively re-orders stmts. */
8684 if (!PURE_SLP_STMT (stmt_info)
8685 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8686 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8687 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8689 if (dump_enabled_p ())
8690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8691 "cannot perform implicit CSE when performing "
8692 "group loads with negative dependence distance\n");
8693 return false;
8696 else
8697 group_size = 1;
8699 vect_memory_access_type memory_access_type;
8700 enum dr_alignment_support alignment_support_scheme;
8701 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8702 ncopies, &memory_access_type,
8703 &alignment_support_scheme, &gs_info))
8704 return false;
8706 if (mask)
8708 if (memory_access_type == VMAT_CONTIGUOUS)
8710 machine_mode vec_mode = TYPE_MODE (vectype);
8711 if (!VECTOR_MODE_P (vec_mode)
8712 || !can_vec_mask_load_store_p (vec_mode,
8713 TYPE_MODE (mask_vectype), true))
8714 return false;
8716 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8717 && memory_access_type != VMAT_GATHER_SCATTER)
8719 if (dump_enabled_p ())
8720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8721 "unsupported access type for masked load.\n");
8722 return false;
8724 else if (memory_access_type == VMAT_GATHER_SCATTER
8725 && gs_info.ifn == IFN_LAST
8726 && !gs_info.decl)
8728 if (dump_enabled_p ())
8729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8730 "unsupported masked emulated gather.\n");
8731 return false;
8735 if (!vec_stmt) /* transformation not required. */
8737 if (slp_node
8738 && mask
8739 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8740 mask_vectype))
8742 if (dump_enabled_p ())
8743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8744 "incompatible vector types for invariants\n");
8745 return false;
8748 if (!slp)
8749 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8751 if (loop_vinfo
8752 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8753 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8754 group_size, memory_access_type,
8755 &gs_info, mask);
8757 if (dump_enabled_p ()
8758 && memory_access_type != VMAT_ELEMENTWISE
8759 && memory_access_type != VMAT_GATHER_SCATTER
8760 && alignment_support_scheme != dr_aligned)
8761 dump_printf_loc (MSG_NOTE, vect_location,
8762 "Vectorizing an unaligned access.\n");
8764 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8765 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8766 &gs_info, slp_node, cost_vec);
8767 return true;
8770 if (!slp)
8771 gcc_assert (memory_access_type
8772 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8774 if (dump_enabled_p ())
8775 dump_printf_loc (MSG_NOTE, vect_location,
8776 "transform load. ncopies = %d\n", ncopies);
8778 /* Transform. */
8780 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8781 ensure_base_align (dr_info);
8783 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8785 vect_build_gather_load_calls (vinfo,
8786 stmt_info, gsi, vec_stmt, &gs_info, mask);
8787 return true;
8790 if (memory_access_type == VMAT_INVARIANT)
8792 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8793 /* If we have versioned for aliasing or the loop doesn't
8794 have any data dependencies that would preclude this,
8795 then we are sure this is a loop invariant load and
8796 thus we can insert it on the preheader edge. */
8797 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8798 && !nested_in_vect_loop
8799 && hoist_defs_of_uses (stmt_info, loop));
8800 if (hoist_p)
8802 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8803 if (dump_enabled_p ())
8804 dump_printf_loc (MSG_NOTE, vect_location,
8805 "hoisting out of the vectorized loop: %G", stmt);
8806 scalar_dest = copy_ssa_name (scalar_dest);
8807 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8808 gsi_insert_on_edge_immediate
8809 (loop_preheader_edge (loop),
8810 gimple_build_assign (scalar_dest, rhs));
8812 /* These copies are all equivalent, but currently the representation
8813 requires a separate STMT_VINFO_VEC_STMT for each one. */
8814 gimple_stmt_iterator gsi2 = *gsi;
8815 gsi_next (&gsi2);
8816 for (j = 0; j < ncopies; j++)
8818 if (hoist_p)
8819 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8820 vectype, NULL);
8821 else
8822 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8823 vectype, &gsi2);
8824 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8825 if (slp)
8826 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8827 else
8829 if (j == 0)
8830 *vec_stmt = new_stmt;
8831 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8834 return true;
8837 if (memory_access_type == VMAT_ELEMENTWISE
8838 || memory_access_type == VMAT_STRIDED_SLP)
8840 gimple_stmt_iterator incr_gsi;
8841 bool insert_after;
8842 tree offvar;
8843 tree ivstep;
8844 tree running_off;
8845 vec<constructor_elt, va_gc> *v = NULL;
8846 tree stride_base, stride_step, alias_off;
8847 /* Checked by get_load_store_type. */
8848 unsigned int const_nunits = nunits.to_constant ();
8849 unsigned HOST_WIDE_INT cst_offset = 0;
8850 tree dr_offset;
8852 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8853 gcc_assert (!nested_in_vect_loop);
8855 if (grouped_load)
8857 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8858 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8860 else
8862 first_stmt_info = stmt_info;
8863 first_dr_info = dr_info;
8865 if (slp && grouped_load)
8867 group_size = DR_GROUP_SIZE (first_stmt_info);
8868 ref_type = get_group_alias_ptr_type (first_stmt_info);
8870 else
8872 if (grouped_load)
8873 cst_offset
8874 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8875 * vect_get_place_in_interleaving_chain (stmt_info,
8876 first_stmt_info));
8877 group_size = 1;
8878 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8881 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8882 stride_base
8883 = fold_build_pointer_plus
8884 (DR_BASE_ADDRESS (first_dr_info->dr),
8885 size_binop (PLUS_EXPR,
8886 convert_to_ptrofftype (dr_offset),
8887 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8888 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8890 /* For a load with loop-invariant (but other than power-of-2)
8891 stride (i.e. not a grouped access) like so:
8893 for (i = 0; i < n; i += stride)
8894 ... = array[i];
8896 we generate a new induction variable and new accesses to
8897 form a new vector (or vectors, depending on ncopies):
8899 for (j = 0; ; j += VF*stride)
8900 tmp1 = array[j];
8901 tmp2 = array[j + stride];
8903 vectemp = {tmp1, tmp2, ...}
8906 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8907 build_int_cst (TREE_TYPE (stride_step), vf));
8909 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8911 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8912 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8913 create_iv (stride_base, ivstep, NULL,
8914 loop, &incr_gsi, insert_after,
8915 &offvar, NULL);
8917 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8919 running_off = offvar;
8920 alias_off = build_int_cst (ref_type, 0);
8921 int nloads = const_nunits;
8922 int lnel = 1;
8923 tree ltype = TREE_TYPE (vectype);
8924 tree lvectype = vectype;
8925 auto_vec<tree> dr_chain;
8926 if (memory_access_type == VMAT_STRIDED_SLP)
8928 if (group_size < const_nunits)
8930 /* First check if vec_init optab supports construction from vector
8931 elts directly. Otherwise avoid emitting a constructor of
8932 vector elements by performing the loads using an integer type
8933 of the same size, constructing a vector of those and then
8934 re-interpreting it as the original vector type. This avoids a
8935 huge runtime penalty due to the general inability to perform
8936 store forwarding from smaller stores to a larger load. */
8937 tree ptype;
8938 tree vtype
8939 = vector_vector_composition_type (vectype,
8940 const_nunits / group_size,
8941 &ptype);
8942 if (vtype != NULL_TREE)
8944 nloads = const_nunits / group_size;
8945 lnel = group_size;
8946 lvectype = vtype;
8947 ltype = ptype;
8950 else
8952 nloads = 1;
8953 lnel = const_nunits;
8954 ltype = vectype;
8956 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8958 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8959 else if (nloads == 1)
8960 ltype = vectype;
8962 if (slp)
8964 /* For SLP permutation support we need to load the whole group,
8965 not only the number of vector stmts the permutation result
8966 fits in. */
8967 if (slp_perm)
8969 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8970 variable VF. */
8971 unsigned int const_vf = vf.to_constant ();
8972 ncopies = CEIL (group_size * const_vf, const_nunits);
8973 dr_chain.create (ncopies);
8975 else
8976 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8978 unsigned int group_el = 0;
8979 unsigned HOST_WIDE_INT
8980 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8981 for (j = 0; j < ncopies; j++)
8983 if (nloads > 1)
8984 vec_alloc (v, nloads);
8985 gimple *new_stmt = NULL;
8986 for (i = 0; i < nloads; i++)
8988 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8989 group_el * elsz + cst_offset);
8990 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8991 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8992 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8993 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8994 if (nloads > 1)
8995 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8996 gimple_assign_lhs (new_stmt));
8998 group_el += lnel;
8999 if (! slp
9000 || group_el == group_size)
9002 tree newoff = copy_ssa_name (running_off);
9003 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9004 running_off, stride_step);
9005 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9007 running_off = newoff;
9008 group_el = 0;
9011 if (nloads > 1)
9013 tree vec_inv = build_constructor (lvectype, v);
9014 new_temp = vect_init_vector (vinfo, stmt_info,
9015 vec_inv, lvectype, gsi);
9016 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9017 if (lvectype != vectype)
9019 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9020 VIEW_CONVERT_EXPR,
9021 build1 (VIEW_CONVERT_EXPR,
9022 vectype, new_temp));
9023 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9027 if (slp)
9029 if (slp_perm)
9030 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9031 else
9032 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9034 else
9036 if (j == 0)
9037 *vec_stmt = new_stmt;
9038 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9041 if (slp_perm)
9043 unsigned n_perms;
9044 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9045 false, &n_perms);
9047 return true;
9050 if (memory_access_type == VMAT_GATHER_SCATTER
9051 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9052 grouped_load = false;
9054 if (grouped_load)
9056 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9057 group_size = DR_GROUP_SIZE (first_stmt_info);
9058 /* For SLP vectorization we directly vectorize a subchain
9059 without permutation. */
9060 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9061 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9062 /* For BB vectorization always use the first stmt to base
9063 the data ref pointer on. */
9064 if (bb_vinfo)
9065 first_stmt_info_for_drptr
9066 = vect_find_first_scalar_stmt_in_slp (slp_node);
9068 /* Check if the chain of loads is already vectorized. */
9069 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9070 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9071 ??? But we can only do so if there is exactly one
9072 as we have no way to get at the rest. Leave the CSE
9073 opportunity alone.
9074 ??? With the group load eventually participating
9075 in multiple different permutations (having multiple
9076 slp nodes which refer to the same group) the CSE
9077 is even wrong code. See PR56270. */
9078 && !slp)
9080 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9081 return true;
9083 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9084 group_gap_adj = 0;
9086 /* VEC_NUM is the number of vect stmts to be created for this group. */
9087 if (slp)
9089 grouped_load = false;
9090 /* If an SLP permutation is from N elements to N elements,
9091 and if one vector holds a whole number of N, we can load
9092 the inputs to the permutation in the same way as an
9093 unpermuted sequence. In other cases we need to load the
9094 whole group, not only the number of vector stmts the
9095 permutation result fits in. */
9096 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9097 if (slp_perm
9098 && (group_size != scalar_lanes
9099 || !multiple_p (nunits, group_size)))
9101 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9102 variable VF; see vect_transform_slp_perm_load. */
9103 unsigned int const_vf = vf.to_constant ();
9104 unsigned int const_nunits = nunits.to_constant ();
9105 vec_num = CEIL (group_size * const_vf, const_nunits);
9106 group_gap_adj = vf * group_size - nunits * vec_num;
9108 else
9110 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9111 group_gap_adj
9112 = group_size - scalar_lanes;
9115 else
9116 vec_num = group_size;
9118 ref_type = get_group_alias_ptr_type (first_stmt_info);
9120 else
9122 first_stmt_info = stmt_info;
9123 first_dr_info = dr_info;
9124 group_size = vec_num = 1;
9125 group_gap_adj = 0;
9126 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9129 gcc_assert (alignment_support_scheme);
9130 vec_loop_masks *loop_masks
9131 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9132 ? &LOOP_VINFO_MASKS (loop_vinfo)
9133 : NULL);
9134 vec_loop_lens *loop_lens
9135 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9136 ? &LOOP_VINFO_LENS (loop_vinfo)
9137 : NULL);
9139 /* Shouldn't go with length-based approach if fully masked. */
9140 gcc_assert (!loop_lens || !loop_masks);
9142 /* Targets with store-lane instructions must not require explicit
9143 realignment. vect_supportable_dr_alignment always returns either
9144 dr_aligned or dr_unaligned_supported for masked operations. */
9145 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9146 && !mask
9147 && !loop_masks)
9148 || alignment_support_scheme == dr_aligned
9149 || alignment_support_scheme == dr_unaligned_supported);
9151 /* In case the vectorization factor (VF) is bigger than the number
9152 of elements that we can fit in a vectype (nunits), we have to generate
9153 more than one vector stmt - i.e - we need to "unroll" the
9154 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9155 from one copy of the vector stmt to the next, in the field
9156 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9157 stages to find the correct vector defs to be used when vectorizing
9158 stmts that use the defs of the current stmt. The example below
9159 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9160 need to create 4 vectorized stmts):
9162 before vectorization:
9163 RELATED_STMT VEC_STMT
9164 S1: x = memref - -
9165 S2: z = x + 1 - -
9167 step 1: vectorize stmt S1:
9168 We first create the vector stmt VS1_0, and, as usual, record a
9169 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9170 Next, we create the vector stmt VS1_1, and record a pointer to
9171 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9172 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9173 stmts and pointers:
9174 RELATED_STMT VEC_STMT
9175 VS1_0: vx0 = memref0 VS1_1 -
9176 VS1_1: vx1 = memref1 VS1_2 -
9177 VS1_2: vx2 = memref2 VS1_3 -
9178 VS1_3: vx3 = memref3 - -
9179 S1: x = load - VS1_0
9180 S2: z = x + 1 - -
9183 /* In case of interleaving (non-unit grouped access):
9185 S1: x2 = &base + 2
9186 S2: x0 = &base
9187 S3: x1 = &base + 1
9188 S4: x3 = &base + 3
9190 Vectorized loads are created in the order of memory accesses
9191 starting from the access of the first stmt of the chain:
9193 VS1: vx0 = &base
9194 VS2: vx1 = &base + vec_size*1
9195 VS3: vx3 = &base + vec_size*2
9196 VS4: vx4 = &base + vec_size*3
9198 Then permutation statements are generated:
9200 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9201 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9204 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9205 (the order of the data-refs in the output of vect_permute_load_chain
9206 corresponds to the order of scalar stmts in the interleaving chain - see
9207 the documentation of vect_permute_load_chain()).
9208 The generation of permutation stmts and recording them in
9209 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9211 In case of both multiple types and interleaving, the vector loads and
9212 permutation stmts above are created for every copy. The result vector
9213 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9214 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9216 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9217 on a target that supports unaligned accesses (dr_unaligned_supported)
9218 we generate the following code:
9219 p = initial_addr;
9220 indx = 0;
9221 loop {
9222 p = p + indx * vectype_size;
9223 vec_dest = *(p);
9224 indx = indx + 1;
9227 Otherwise, the data reference is potentially unaligned on a target that
9228 does not support unaligned accesses (dr_explicit_realign_optimized) -
9229 then generate the following code, in which the data in each iteration is
9230 obtained by two vector loads, one from the previous iteration, and one
9231 from the current iteration:
9232 p1 = initial_addr;
9233 msq_init = *(floor(p1))
9234 p2 = initial_addr + VS - 1;
9235 realignment_token = call target_builtin;
9236 indx = 0;
9237 loop {
9238 p2 = p2 + indx * vectype_size
9239 lsq = *(floor(p2))
9240 vec_dest = realign_load (msq, lsq, realignment_token)
9241 indx = indx + 1;
9242 msq = lsq;
9243 } */
9245 /* If the misalignment remains the same throughout the execution of the
9246 loop, we can create the init_addr and permutation mask at the loop
9247 preheader. Otherwise, it needs to be created inside the loop.
9248 This can only occur when vectorizing memory accesses in the inner-loop
9249 nested within an outer-loop that is being vectorized. */
9251 if (nested_in_vect_loop
9252 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9253 GET_MODE_SIZE (TYPE_MODE (vectype))))
9255 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9256 compute_in_loop = true;
9259 bool diff_first_stmt_info
9260 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9262 if ((alignment_support_scheme == dr_explicit_realign_optimized
9263 || alignment_support_scheme == dr_explicit_realign)
9264 && !compute_in_loop)
9266 /* If we have different first_stmt_info, we can't set up realignment
9267 here, since we can't guarantee first_stmt_info DR has been
9268 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9269 distance from first_stmt_info DR instead as below. */
9270 if (!diff_first_stmt_info)
9271 msq = vect_setup_realignment (vinfo,
9272 first_stmt_info, gsi, &realignment_token,
9273 alignment_support_scheme, NULL_TREE,
9274 &at_loop);
9275 if (alignment_support_scheme == dr_explicit_realign_optimized)
9277 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9278 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9279 size_one_node);
9280 gcc_assert (!first_stmt_info_for_drptr);
9283 else
9284 at_loop = loop;
9286 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9287 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9289 tree bump;
9290 tree vec_offset = NULL_TREE;
9291 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9293 aggr_type = NULL_TREE;
9294 bump = NULL_TREE;
9296 else if (memory_access_type == VMAT_GATHER_SCATTER)
9298 aggr_type = elem_type;
9299 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9300 &bump, &vec_offset);
9302 else
9304 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9305 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9306 else
9307 aggr_type = vectype;
9308 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9309 memory_access_type);
9312 vec<tree> vec_offsets = vNULL;
9313 auto_vec<tree> vec_masks;
9314 if (mask)
9315 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9316 mask, &vec_masks, mask_vectype, NULL_TREE);
9317 tree vec_mask = NULL_TREE;
9318 poly_uint64 group_elt = 0;
9319 for (j = 0; j < ncopies; j++)
9321 /* 1. Create the vector or array pointer update chain. */
9322 if (j == 0)
9324 bool simd_lane_access_p
9325 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9326 if (simd_lane_access_p
9327 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9328 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9329 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9330 && integer_zerop (DR_INIT (first_dr_info->dr))
9331 && alias_sets_conflict_p (get_alias_set (aggr_type),
9332 get_alias_set (TREE_TYPE (ref_type)))
9333 && (alignment_support_scheme == dr_aligned
9334 || alignment_support_scheme == dr_unaligned_supported))
9336 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9337 dataref_offset = build_int_cst (ref_type, 0);
9339 else if (diff_first_stmt_info)
9341 dataref_ptr
9342 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9343 aggr_type, at_loop, offset, &dummy,
9344 gsi, &ptr_incr, simd_lane_access_p,
9345 byte_offset, bump);
9346 /* Adjust the pointer by the difference to first_stmt. */
9347 data_reference_p ptrdr
9348 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9349 tree diff
9350 = fold_convert (sizetype,
9351 size_binop (MINUS_EXPR,
9352 DR_INIT (first_dr_info->dr),
9353 DR_INIT (ptrdr)));
9354 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9355 stmt_info, diff);
9356 if (alignment_support_scheme == dr_explicit_realign)
9358 msq = vect_setup_realignment (vinfo,
9359 first_stmt_info_for_drptr, gsi,
9360 &realignment_token,
9361 alignment_support_scheme,
9362 dataref_ptr, &at_loop);
9363 gcc_assert (!compute_in_loop);
9366 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9368 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9369 &gs_info, &dataref_ptr,
9370 &vec_offsets);
9372 else
9373 dataref_ptr
9374 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9375 at_loop,
9376 offset, &dummy, gsi, &ptr_incr,
9377 simd_lane_access_p,
9378 byte_offset, bump);
9379 if (mask)
9380 vec_mask = vec_masks[0];
9382 else
9384 if (dataref_offset)
9385 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9386 bump);
9387 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9388 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9389 stmt_info, bump);
9390 if (mask)
9391 vec_mask = vec_masks[j];
9394 if (grouped_load || slp_perm)
9395 dr_chain.create (vec_num);
9397 gimple *new_stmt = NULL;
9398 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9400 tree vec_array;
9402 vec_array = create_vector_array (vectype, vec_num);
9404 tree final_mask = NULL_TREE;
9405 if (loop_masks)
9406 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9407 vectype, j);
9408 if (vec_mask)
9409 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9410 vec_mask, gsi);
9412 gcall *call;
9413 if (final_mask)
9415 /* Emit:
9416 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9417 VEC_MASK). */
9418 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9419 tree alias_ptr = build_int_cst (ref_type, align);
9420 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9421 dataref_ptr, alias_ptr,
9422 final_mask);
9424 else
9426 /* Emit:
9427 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9428 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9429 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9431 gimple_call_set_lhs (call, vec_array);
9432 gimple_call_set_nothrow (call, true);
9433 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9434 new_stmt = call;
9436 /* Extract each vector into an SSA_NAME. */
9437 for (i = 0; i < vec_num; i++)
9439 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9440 vec_array, i);
9441 dr_chain.quick_push (new_temp);
9444 /* Record the mapping between SSA_NAMEs and statements. */
9445 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9447 /* Record that VEC_ARRAY is now dead. */
9448 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9450 else
9452 for (i = 0; i < vec_num; i++)
9454 tree final_mask = NULL_TREE;
9455 if (loop_masks
9456 && memory_access_type != VMAT_INVARIANT)
9457 final_mask = vect_get_loop_mask (gsi, loop_masks,
9458 vec_num * ncopies,
9459 vectype, vec_num * j + i);
9460 if (vec_mask)
9461 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9462 vec_mask, gsi);
9464 if (i > 0)
9465 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9466 gsi, stmt_info, bump);
9468 /* 2. Create the vector-load in the loop. */
9469 switch (alignment_support_scheme)
9471 case dr_aligned:
9472 case dr_unaligned_supported:
9474 unsigned int misalign;
9475 unsigned HOST_WIDE_INT align;
9477 if (memory_access_type == VMAT_GATHER_SCATTER
9478 && gs_info.ifn != IFN_LAST)
9480 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9481 vec_offset = vec_offsets[j];
9482 tree zero = build_zero_cst (vectype);
9483 tree scale = size_int (gs_info.scale);
9484 gcall *call;
9485 if (final_mask)
9486 call = gimple_build_call_internal
9487 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9488 vec_offset, scale, zero, final_mask);
9489 else
9490 call = gimple_build_call_internal
9491 (IFN_GATHER_LOAD, 4, dataref_ptr,
9492 vec_offset, scale, zero);
9493 gimple_call_set_nothrow (call, true);
9494 new_stmt = call;
9495 data_ref = NULL_TREE;
9496 break;
9498 else if (memory_access_type == VMAT_GATHER_SCATTER)
9500 /* Emulated gather-scatter. */
9501 gcc_assert (!final_mask);
9502 unsigned HOST_WIDE_INT const_nunits
9503 = nunits.to_constant ();
9504 unsigned HOST_WIDE_INT const_offset_nunits
9505 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9506 .to_constant ();
9507 vec<constructor_elt, va_gc> *ctor_elts;
9508 vec_alloc (ctor_elts, const_nunits);
9509 gimple_seq stmts = NULL;
9510 /* We support offset vectors with more elements
9511 than the data vector for now. */
9512 unsigned HOST_WIDE_INT factor
9513 = const_offset_nunits / const_nunits;
9514 vec_offset = vec_offsets[j / factor];
9515 unsigned elt_offset = (j % factor) * const_nunits;
9516 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9517 tree scale = size_int (gs_info.scale);
9518 align
9519 = get_object_alignment (DR_REF (first_dr_info->dr));
9520 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9521 align);
9522 for (unsigned k = 0; k < const_nunits; ++k)
9524 tree boff = size_binop (MULT_EXPR,
9525 TYPE_SIZE (idx_type),
9526 bitsize_int
9527 (k + elt_offset));
9528 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9529 idx_type, vec_offset,
9530 TYPE_SIZE (idx_type),
9531 boff);
9532 idx = gimple_convert (&stmts, sizetype, idx);
9533 idx = gimple_build (&stmts, MULT_EXPR,
9534 sizetype, idx, scale);
9535 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9536 TREE_TYPE (dataref_ptr),
9537 dataref_ptr, idx);
9538 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9539 tree elt = make_ssa_name (TREE_TYPE (vectype));
9540 tree ref = build2 (MEM_REF, ltype, ptr,
9541 build_int_cst (ref_type, 0));
9542 new_stmt = gimple_build_assign (elt, ref);
9543 gimple_seq_add_stmt (&stmts, new_stmt);
9544 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9546 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9547 new_stmt = gimple_build_assign (NULL_TREE,
9548 build_constructor
9549 (vectype, ctor_elts));
9550 data_ref = NULL_TREE;
9551 break;
9554 align =
9555 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9556 if (alignment_support_scheme == dr_aligned)
9558 gcc_assert (aligned_access_p (first_dr_info, vectype));
9559 misalign = 0;
9561 else if (dr_misalignment (first_dr_info, vectype) == -1)
9563 align = dr_alignment
9564 (vect_dr_behavior (vinfo, first_dr_info));
9565 misalign = 0;
9567 else
9568 misalign = dr_misalignment (first_dr_info, vectype);
9569 if (dataref_offset == NULL_TREE
9570 && TREE_CODE (dataref_ptr) == SSA_NAME)
9571 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9572 align, misalign);
9573 align = least_bit_hwi (misalign | align);
9575 if (final_mask)
9577 tree ptr = build_int_cst (ref_type,
9578 align * BITS_PER_UNIT);
9579 gcall *call
9580 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9581 dataref_ptr, ptr,
9582 final_mask);
9583 gimple_call_set_nothrow (call, true);
9584 new_stmt = call;
9585 data_ref = NULL_TREE;
9587 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9589 tree final_len
9590 = vect_get_loop_len (loop_vinfo, loop_lens,
9591 vec_num * ncopies,
9592 vec_num * j + i);
9593 tree ptr = build_int_cst (ref_type,
9594 align * BITS_PER_UNIT);
9595 gcall *call
9596 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9597 dataref_ptr, ptr,
9598 final_len);
9599 gimple_call_set_nothrow (call, true);
9600 new_stmt = call;
9601 data_ref = NULL_TREE;
9603 /* Need conversion if it's wrapped with VnQI. */
9604 machine_mode vmode = TYPE_MODE (vectype);
9605 opt_machine_mode new_ovmode
9606 = get_len_load_store_mode (vmode, true);
9607 machine_mode new_vmode = new_ovmode.require ();
9608 if (vmode != new_vmode)
9610 tree qi_type = unsigned_intQI_type_node;
9611 tree new_vtype
9612 = build_vector_type_for_mode (qi_type, new_vmode);
9613 tree var = vect_get_new_ssa_name (new_vtype,
9614 vect_simple_var);
9615 gimple_set_lhs (call, var);
9616 vect_finish_stmt_generation (vinfo, stmt_info, call,
9617 gsi);
9618 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9619 new_stmt
9620 = gimple_build_assign (vec_dest,
9621 VIEW_CONVERT_EXPR, op);
9624 else
9626 tree ltype = vectype;
9627 tree new_vtype = NULL_TREE;
9628 unsigned HOST_WIDE_INT gap
9629 = DR_GROUP_GAP (first_stmt_info);
9630 unsigned int vect_align
9631 = vect_known_alignment_in_bytes (first_dr_info,
9632 vectype);
9633 unsigned int scalar_dr_size
9634 = vect_get_scalar_dr_size (first_dr_info);
9635 /* If there's no peeling for gaps but we have a gap
9636 with slp loads then load the lower half of the
9637 vector only. See get_group_load_store_type for
9638 when we apply this optimization. */
9639 if (slp
9640 && loop_vinfo
9641 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9642 && gap != 0
9643 && known_eq (nunits, (group_size - gap) * 2)
9644 && known_eq (nunits, group_size)
9645 && gap >= (vect_align / scalar_dr_size))
9647 tree half_vtype;
9648 new_vtype
9649 = vector_vector_composition_type (vectype, 2,
9650 &half_vtype);
9651 if (new_vtype != NULL_TREE)
9652 ltype = half_vtype;
9654 tree offset
9655 = (dataref_offset ? dataref_offset
9656 : build_int_cst (ref_type, 0));
9657 if (ltype != vectype
9658 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9660 unsigned HOST_WIDE_INT gap_offset
9661 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9662 tree gapcst = build_int_cst (ref_type, gap_offset);
9663 offset = size_binop (PLUS_EXPR, offset, gapcst);
9665 data_ref
9666 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9667 if (alignment_support_scheme == dr_aligned)
9669 else
9670 TREE_TYPE (data_ref)
9671 = build_aligned_type (TREE_TYPE (data_ref),
9672 align * BITS_PER_UNIT);
9673 if (ltype != vectype)
9675 vect_copy_ref_info (data_ref,
9676 DR_REF (first_dr_info->dr));
9677 tree tem = make_ssa_name (ltype);
9678 new_stmt = gimple_build_assign (tem, data_ref);
9679 vect_finish_stmt_generation (vinfo, stmt_info,
9680 new_stmt, gsi);
9681 data_ref = NULL;
9682 vec<constructor_elt, va_gc> *v;
9683 vec_alloc (v, 2);
9684 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9686 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9687 build_zero_cst (ltype));
9688 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9690 else
9692 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9693 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9694 build_zero_cst (ltype));
9696 gcc_assert (new_vtype != NULL_TREE);
9697 if (new_vtype == vectype)
9698 new_stmt = gimple_build_assign (
9699 vec_dest, build_constructor (vectype, v));
9700 else
9702 tree new_vname = make_ssa_name (new_vtype);
9703 new_stmt = gimple_build_assign (
9704 new_vname, build_constructor (new_vtype, v));
9705 vect_finish_stmt_generation (vinfo, stmt_info,
9706 new_stmt, gsi);
9707 new_stmt = gimple_build_assign (
9708 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9709 new_vname));
9713 break;
9715 case dr_explicit_realign:
9717 tree ptr, bump;
9719 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9721 if (compute_in_loop)
9722 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9723 &realignment_token,
9724 dr_explicit_realign,
9725 dataref_ptr, NULL);
9727 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9728 ptr = copy_ssa_name (dataref_ptr);
9729 else
9730 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9731 // For explicit realign the target alignment should be
9732 // known at compile time.
9733 unsigned HOST_WIDE_INT align =
9734 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9735 new_stmt = gimple_build_assign
9736 (ptr, BIT_AND_EXPR, dataref_ptr,
9737 build_int_cst
9738 (TREE_TYPE (dataref_ptr),
9739 -(HOST_WIDE_INT) align));
9740 vect_finish_stmt_generation (vinfo, stmt_info,
9741 new_stmt, gsi);
9742 data_ref
9743 = build2 (MEM_REF, vectype, ptr,
9744 build_int_cst (ref_type, 0));
9745 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9746 vec_dest = vect_create_destination_var (scalar_dest,
9747 vectype);
9748 new_stmt = gimple_build_assign (vec_dest, data_ref);
9749 new_temp = make_ssa_name (vec_dest, new_stmt);
9750 gimple_assign_set_lhs (new_stmt, new_temp);
9751 gimple_move_vops (new_stmt, stmt_info->stmt);
9752 vect_finish_stmt_generation (vinfo, stmt_info,
9753 new_stmt, gsi);
9754 msq = new_temp;
9756 bump = size_binop (MULT_EXPR, vs,
9757 TYPE_SIZE_UNIT (elem_type));
9758 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9759 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9760 stmt_info, bump);
9761 new_stmt = gimple_build_assign
9762 (NULL_TREE, BIT_AND_EXPR, ptr,
9763 build_int_cst
9764 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9765 ptr = copy_ssa_name (ptr, new_stmt);
9766 gimple_assign_set_lhs (new_stmt, ptr);
9767 vect_finish_stmt_generation (vinfo, stmt_info,
9768 new_stmt, gsi);
9769 data_ref
9770 = build2 (MEM_REF, vectype, ptr,
9771 build_int_cst (ref_type, 0));
9772 break;
9774 case dr_explicit_realign_optimized:
9776 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9777 new_temp = copy_ssa_name (dataref_ptr);
9778 else
9779 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9780 // We should only be doing this if we know the target
9781 // alignment at compile time.
9782 unsigned HOST_WIDE_INT align =
9783 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9784 new_stmt = gimple_build_assign
9785 (new_temp, BIT_AND_EXPR, dataref_ptr,
9786 build_int_cst (TREE_TYPE (dataref_ptr),
9787 -(HOST_WIDE_INT) align));
9788 vect_finish_stmt_generation (vinfo, stmt_info,
9789 new_stmt, gsi);
9790 data_ref
9791 = build2 (MEM_REF, vectype, new_temp,
9792 build_int_cst (ref_type, 0));
9793 break;
9795 default:
9796 gcc_unreachable ();
9798 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9799 /* DATA_REF is null if we've already built the statement. */
9800 if (data_ref)
9802 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9803 new_stmt = gimple_build_assign (vec_dest, data_ref);
9805 new_temp = make_ssa_name (vec_dest, new_stmt);
9806 gimple_set_lhs (new_stmt, new_temp);
9807 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9809 /* 3. Handle explicit realignment if necessary/supported.
9810 Create in loop:
9811 vec_dest = realign_load (msq, lsq, realignment_token) */
9812 if (alignment_support_scheme == dr_explicit_realign_optimized
9813 || alignment_support_scheme == dr_explicit_realign)
9815 lsq = gimple_assign_lhs (new_stmt);
9816 if (!realignment_token)
9817 realignment_token = dataref_ptr;
9818 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9819 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9820 msq, lsq, realignment_token);
9821 new_temp = make_ssa_name (vec_dest, new_stmt);
9822 gimple_assign_set_lhs (new_stmt, new_temp);
9823 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9825 if (alignment_support_scheme == dr_explicit_realign_optimized)
9827 gcc_assert (phi);
9828 if (i == vec_num - 1 && j == ncopies - 1)
9829 add_phi_arg (phi, lsq,
9830 loop_latch_edge (containing_loop),
9831 UNKNOWN_LOCATION);
9832 msq = lsq;
9836 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9838 tree perm_mask = perm_mask_for_reverse (vectype);
9839 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9840 perm_mask, stmt_info, gsi);
9841 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9844 /* Collect vector loads and later create their permutation in
9845 vect_transform_grouped_load (). */
9846 if (grouped_load || slp_perm)
9847 dr_chain.quick_push (new_temp);
9849 /* Store vector loads in the corresponding SLP_NODE. */
9850 if (slp && !slp_perm)
9851 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9853 /* With SLP permutation we load the gaps as well, without
9854 we need to skip the gaps after we manage to fully load
9855 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9856 group_elt += nunits;
9857 if (maybe_ne (group_gap_adj, 0U)
9858 && !slp_perm
9859 && known_eq (group_elt, group_size - group_gap_adj))
9861 poly_wide_int bump_val
9862 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9863 * group_gap_adj);
9864 if (tree_int_cst_sgn
9865 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9866 bump_val = -bump_val;
9867 tree bump = wide_int_to_tree (sizetype, bump_val);
9868 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9869 gsi, stmt_info, bump);
9870 group_elt = 0;
9873 /* Bump the vector pointer to account for a gap or for excess
9874 elements loaded for a permuted SLP load. */
9875 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9877 poly_wide_int bump_val
9878 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9879 * group_gap_adj);
9880 if (tree_int_cst_sgn
9881 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9882 bump_val = -bump_val;
9883 tree bump = wide_int_to_tree (sizetype, bump_val);
9884 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9885 stmt_info, bump);
9889 if (slp && !slp_perm)
9890 continue;
9892 if (slp_perm)
9894 unsigned n_perms;
9895 /* For SLP we know we've seen all possible uses of dr_chain so
9896 direct vect_transform_slp_perm_load to DCE the unused parts.
9897 ??? This is a hack to prevent compile-time issues as seen
9898 in PR101120 and friends. */
9899 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9900 gsi, vf, false, &n_perms,
9901 nullptr, true);
9902 gcc_assert (ok);
9904 else
9906 if (grouped_load)
9908 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9909 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9910 group_size, gsi);
9911 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9913 else
9915 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9918 dr_chain.release ();
9920 if (!slp)
9921 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9923 return true;
9926 /* Function vect_is_simple_cond.
9928 Input:
9929 LOOP - the loop that is being vectorized.
9930 COND - Condition that is checked for simple use.
9932 Output:
9933 *COMP_VECTYPE - the vector type for the comparison.
9934 *DTS - The def types for the arguments of the comparison
9936 Returns whether a COND can be vectorized. Checks whether
9937 condition operands are supportable using vec_is_simple_use. */
9939 static bool
9940 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9941 slp_tree slp_node, tree *comp_vectype,
9942 enum vect_def_type *dts, tree vectype)
9944 tree lhs, rhs;
9945 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9946 slp_tree slp_op;
9948 /* Mask case. */
9949 if (TREE_CODE (cond) == SSA_NAME
9950 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9952 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9953 &slp_op, &dts[0], comp_vectype)
9954 || !*comp_vectype
9955 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9956 return false;
9957 return true;
9960 if (!COMPARISON_CLASS_P (cond))
9961 return false;
9963 lhs = TREE_OPERAND (cond, 0);
9964 rhs = TREE_OPERAND (cond, 1);
9966 if (TREE_CODE (lhs) == SSA_NAME)
9968 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9969 &lhs, &slp_op, &dts[0], &vectype1))
9970 return false;
9972 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9973 || TREE_CODE (lhs) == FIXED_CST)
9974 dts[0] = vect_constant_def;
9975 else
9976 return false;
9978 if (TREE_CODE (rhs) == SSA_NAME)
9980 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9981 &rhs, &slp_op, &dts[1], &vectype2))
9982 return false;
9984 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9985 || TREE_CODE (rhs) == FIXED_CST)
9986 dts[1] = vect_constant_def;
9987 else
9988 return false;
9990 if (vectype1 && vectype2
9991 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9992 TYPE_VECTOR_SUBPARTS (vectype2)))
9993 return false;
9995 *comp_vectype = vectype1 ? vectype1 : vectype2;
9996 /* Invariant comparison. */
9997 if (! *comp_vectype)
9999 tree scalar_type = TREE_TYPE (lhs);
10000 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10001 *comp_vectype = truth_type_for (vectype);
10002 else
10004 /* If we can widen the comparison to match vectype do so. */
10005 if (INTEGRAL_TYPE_P (scalar_type)
10006 && !slp_node
10007 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10008 TYPE_SIZE (TREE_TYPE (vectype))))
10009 scalar_type = build_nonstandard_integer_type
10010 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10011 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10012 slp_node);
10016 return true;
10019 /* vectorizable_condition.
10021 Check if STMT_INFO is conditional modify expression that can be vectorized.
10022 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10023 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10024 at GSI.
10026 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10028 Return true if STMT_INFO is vectorizable in this way. */
10030 static bool
10031 vectorizable_condition (vec_info *vinfo,
10032 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10033 gimple **vec_stmt,
10034 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10036 tree scalar_dest = NULL_TREE;
10037 tree vec_dest = NULL_TREE;
10038 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10039 tree then_clause, else_clause;
10040 tree comp_vectype = NULL_TREE;
10041 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10042 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10043 tree vec_compare;
10044 tree new_temp;
10045 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10046 enum vect_def_type dts[4]
10047 = {vect_unknown_def_type, vect_unknown_def_type,
10048 vect_unknown_def_type, vect_unknown_def_type};
10049 int ndts = 4;
10050 int ncopies;
10051 int vec_num;
10052 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10053 int i;
10054 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10055 vec<tree> vec_oprnds0 = vNULL;
10056 vec<tree> vec_oprnds1 = vNULL;
10057 vec<tree> vec_oprnds2 = vNULL;
10058 vec<tree> vec_oprnds3 = vNULL;
10059 tree vec_cmp_type;
10060 bool masked = false;
10062 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10063 return false;
10065 /* Is vectorizable conditional operation? */
10066 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10067 if (!stmt)
10068 return false;
10070 code = gimple_assign_rhs_code (stmt);
10071 if (code != COND_EXPR)
10072 return false;
10074 stmt_vec_info reduc_info = NULL;
10075 int reduc_index = -1;
10076 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10077 bool for_reduction
10078 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10079 if (for_reduction)
10081 if (STMT_SLP_TYPE (stmt_info))
10082 return false;
10083 reduc_info = info_for_reduction (vinfo, stmt_info);
10084 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10085 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10086 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10087 || reduc_index != -1);
10089 else
10091 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10092 return false;
10095 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10096 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10098 if (slp_node)
10100 ncopies = 1;
10101 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10103 else
10105 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10106 vec_num = 1;
10109 gcc_assert (ncopies >= 1);
10110 if (for_reduction && ncopies > 1)
10111 return false; /* FORNOW */
10113 cond_expr = gimple_assign_rhs1 (stmt);
10115 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10116 &comp_vectype, &dts[0], vectype)
10117 || !comp_vectype)
10118 return false;
10120 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10121 slp_tree then_slp_node, else_slp_node;
10122 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10123 &then_clause, &then_slp_node, &dts[2], &vectype1))
10124 return false;
10125 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10126 &else_clause, &else_slp_node, &dts[3], &vectype2))
10127 return false;
10129 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10130 return false;
10132 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10133 return false;
10135 masked = !COMPARISON_CLASS_P (cond_expr);
10136 vec_cmp_type = truth_type_for (comp_vectype);
10138 if (vec_cmp_type == NULL_TREE)
10139 return false;
10141 cond_code = TREE_CODE (cond_expr);
10142 if (!masked)
10144 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10145 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10148 /* For conditional reductions, the "then" value needs to be the candidate
10149 value calculated by this iteration while the "else" value needs to be
10150 the result carried over from previous iterations. If the COND_EXPR
10151 is the other way around, we need to swap it. */
10152 bool must_invert_cmp_result = false;
10153 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10155 if (masked)
10156 must_invert_cmp_result = true;
10157 else
10159 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10160 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10161 if (new_code == ERROR_MARK)
10162 must_invert_cmp_result = true;
10163 else
10165 cond_code = new_code;
10166 /* Make sure we don't accidentally use the old condition. */
10167 cond_expr = NULL_TREE;
10170 std::swap (then_clause, else_clause);
10173 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10175 /* Boolean values may have another representation in vectors
10176 and therefore we prefer bit operations over comparison for
10177 them (which also works for scalar masks). We store opcodes
10178 to use in bitop1 and bitop2. Statement is vectorized as
10179 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10180 depending on bitop1 and bitop2 arity. */
10181 switch (cond_code)
10183 case GT_EXPR:
10184 bitop1 = BIT_NOT_EXPR;
10185 bitop2 = BIT_AND_EXPR;
10186 break;
10187 case GE_EXPR:
10188 bitop1 = BIT_NOT_EXPR;
10189 bitop2 = BIT_IOR_EXPR;
10190 break;
10191 case LT_EXPR:
10192 bitop1 = BIT_NOT_EXPR;
10193 bitop2 = BIT_AND_EXPR;
10194 std::swap (cond_expr0, cond_expr1);
10195 break;
10196 case LE_EXPR:
10197 bitop1 = BIT_NOT_EXPR;
10198 bitop2 = BIT_IOR_EXPR;
10199 std::swap (cond_expr0, cond_expr1);
10200 break;
10201 case NE_EXPR:
10202 bitop1 = BIT_XOR_EXPR;
10203 break;
10204 case EQ_EXPR:
10205 bitop1 = BIT_XOR_EXPR;
10206 bitop2 = BIT_NOT_EXPR;
10207 break;
10208 default:
10209 return false;
10211 cond_code = SSA_NAME;
10214 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10215 && reduction_type == EXTRACT_LAST_REDUCTION
10216 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10218 if (dump_enabled_p ())
10219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10220 "reduction comparison operation not supported.\n");
10221 return false;
10224 if (!vec_stmt)
10226 if (bitop1 != NOP_EXPR)
10228 machine_mode mode = TYPE_MODE (comp_vectype);
10229 optab optab;
10231 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10232 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10233 return false;
10235 if (bitop2 != NOP_EXPR)
10237 optab = optab_for_tree_code (bitop2, comp_vectype,
10238 optab_default);
10239 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10240 return false;
10244 vect_cost_for_stmt kind = vector_stmt;
10245 if (reduction_type == EXTRACT_LAST_REDUCTION)
10246 /* Count one reduction-like operation per vector. */
10247 kind = vec_to_scalar;
10248 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10249 return false;
10251 if (slp_node
10252 && (!vect_maybe_update_slp_op_vectype
10253 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10254 || (op_adjust == 1
10255 && !vect_maybe_update_slp_op_vectype
10256 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10257 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10258 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10260 if (dump_enabled_p ())
10261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10262 "incompatible vector types for invariants\n");
10263 return false;
10266 if (loop_vinfo && for_reduction
10267 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10269 if (reduction_type == EXTRACT_LAST_REDUCTION)
10270 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10271 ncopies * vec_num, vectype, NULL);
10272 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10273 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10275 if (dump_enabled_p ())
10276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10277 "conditional reduction prevents the use"
10278 " of partial vectors.\n");
10279 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10283 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10284 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10285 cost_vec, kind);
10286 return true;
10289 /* Transform. */
10291 /* Handle def. */
10292 scalar_dest = gimple_assign_lhs (stmt);
10293 if (reduction_type != EXTRACT_LAST_REDUCTION)
10294 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10296 bool swap_cond_operands = false;
10298 /* See whether another part of the vectorized code applies a loop
10299 mask to the condition, or to its inverse. */
10301 vec_loop_masks *masks = NULL;
10302 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10304 if (reduction_type == EXTRACT_LAST_REDUCTION)
10305 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10306 else
10308 scalar_cond_masked_key cond (cond_expr, ncopies);
10309 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10310 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10311 else
10313 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10314 cond.code = invert_tree_comparison (cond.code, honor_nans);
10315 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10317 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10318 cond_code = cond.code;
10319 swap_cond_operands = true;
10325 /* Handle cond expr. */
10326 if (masked)
10327 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10328 cond_expr, &vec_oprnds0, comp_vectype,
10329 then_clause, &vec_oprnds2, vectype,
10330 reduction_type != EXTRACT_LAST_REDUCTION
10331 ? else_clause : NULL, &vec_oprnds3, vectype);
10332 else
10333 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10334 cond_expr0, &vec_oprnds0, comp_vectype,
10335 cond_expr1, &vec_oprnds1, comp_vectype,
10336 then_clause, &vec_oprnds2, vectype,
10337 reduction_type != EXTRACT_LAST_REDUCTION
10338 ? else_clause : NULL, &vec_oprnds3, vectype);
10340 /* Arguments are ready. Create the new vector stmt. */
10341 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10343 vec_then_clause = vec_oprnds2[i];
10344 if (reduction_type != EXTRACT_LAST_REDUCTION)
10345 vec_else_clause = vec_oprnds3[i];
10347 if (swap_cond_operands)
10348 std::swap (vec_then_clause, vec_else_clause);
10350 if (masked)
10351 vec_compare = vec_cond_lhs;
10352 else
10354 vec_cond_rhs = vec_oprnds1[i];
10355 if (bitop1 == NOP_EXPR)
10357 gimple_seq stmts = NULL;
10358 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10359 vec_cond_lhs, vec_cond_rhs);
10360 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10362 else
10364 new_temp = make_ssa_name (vec_cmp_type);
10365 gassign *new_stmt;
10366 if (bitop1 == BIT_NOT_EXPR)
10367 new_stmt = gimple_build_assign (new_temp, bitop1,
10368 vec_cond_rhs);
10369 else
10370 new_stmt
10371 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10372 vec_cond_rhs);
10373 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10374 if (bitop2 == NOP_EXPR)
10375 vec_compare = new_temp;
10376 else if (bitop2 == BIT_NOT_EXPR)
10378 /* Instead of doing ~x ? y : z do x ? z : y. */
10379 vec_compare = new_temp;
10380 std::swap (vec_then_clause, vec_else_clause);
10382 else
10384 vec_compare = make_ssa_name (vec_cmp_type);
10385 new_stmt
10386 = gimple_build_assign (vec_compare, bitop2,
10387 vec_cond_lhs, new_temp);
10388 vect_finish_stmt_generation (vinfo, stmt_info,
10389 new_stmt, gsi);
10394 /* If we decided to apply a loop mask to the result of the vector
10395 comparison, AND the comparison with the mask now. Later passes
10396 should then be able to reuse the AND results between mulitple
10397 vector statements.
10399 For example:
10400 for (int i = 0; i < 100; ++i)
10401 x[i] = y[i] ? z[i] : 10;
10403 results in following optimized GIMPLE:
10405 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10406 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10407 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10408 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10409 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10410 vect_iftmp.11_47, { 10, ... }>;
10412 instead of using a masked and unmasked forms of
10413 vec != { 0, ... } (masked in the MASK_LOAD,
10414 unmasked in the VEC_COND_EXPR). */
10416 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10417 in cases where that's necessary. */
10419 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10421 if (!is_gimple_val (vec_compare))
10423 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10424 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10425 vec_compare);
10426 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10427 vec_compare = vec_compare_name;
10430 if (must_invert_cmp_result)
10432 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10433 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10434 BIT_NOT_EXPR,
10435 vec_compare);
10436 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10437 vec_compare = vec_compare_name;
10440 if (masks)
10442 tree loop_mask
10443 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10444 vectype, i);
10445 tree tmp2 = make_ssa_name (vec_cmp_type);
10446 gassign *g
10447 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10448 loop_mask);
10449 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10450 vec_compare = tmp2;
10454 gimple *new_stmt;
10455 if (reduction_type == EXTRACT_LAST_REDUCTION)
10457 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10458 tree lhs = gimple_get_lhs (old_stmt);
10459 new_stmt = gimple_build_call_internal
10460 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10461 vec_then_clause);
10462 gimple_call_set_lhs (new_stmt, lhs);
10463 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10464 if (old_stmt == gsi_stmt (*gsi))
10465 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10466 else
10468 /* In this case we're moving the definition to later in the
10469 block. That doesn't matter because the only uses of the
10470 lhs are in phi statements. */
10471 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10472 gsi_remove (&old_gsi, true);
10473 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10476 else
10478 new_temp = make_ssa_name (vec_dest);
10479 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10480 vec_then_clause, vec_else_clause);
10481 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10483 if (slp_node)
10484 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10485 else
10486 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10489 if (!slp_node)
10490 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10492 vec_oprnds0.release ();
10493 vec_oprnds1.release ();
10494 vec_oprnds2.release ();
10495 vec_oprnds3.release ();
10497 return true;
10500 /* vectorizable_comparison.
10502 Check if STMT_INFO is comparison expression that can be vectorized.
10503 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10504 comparison, put it in VEC_STMT, and insert it at GSI.
10506 Return true if STMT_INFO is vectorizable in this way. */
10508 static bool
10509 vectorizable_comparison (vec_info *vinfo,
10510 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10511 gimple **vec_stmt,
10512 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10514 tree lhs, rhs1, rhs2;
10515 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10516 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10517 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10518 tree new_temp;
10519 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10520 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10521 int ndts = 2;
10522 poly_uint64 nunits;
10523 int ncopies;
10524 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10525 int i;
10526 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10527 vec<tree> vec_oprnds0 = vNULL;
10528 vec<tree> vec_oprnds1 = vNULL;
10529 tree mask_type;
10530 tree mask;
10532 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10533 return false;
10535 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10536 return false;
10538 mask_type = vectype;
10539 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10541 if (slp_node)
10542 ncopies = 1;
10543 else
10544 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10546 gcc_assert (ncopies >= 1);
10547 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10548 return false;
10550 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10551 if (!stmt)
10552 return false;
10554 code = gimple_assign_rhs_code (stmt);
10556 if (TREE_CODE_CLASS (code) != tcc_comparison)
10557 return false;
10559 slp_tree slp_rhs1, slp_rhs2;
10560 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10561 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10562 return false;
10564 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10565 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10566 return false;
10568 if (vectype1 && vectype2
10569 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10570 TYPE_VECTOR_SUBPARTS (vectype2)))
10571 return false;
10573 vectype = vectype1 ? vectype1 : vectype2;
10575 /* Invariant comparison. */
10576 if (!vectype)
10578 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10579 vectype = mask_type;
10580 else
10581 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10582 slp_node);
10583 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10584 return false;
10586 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10587 return false;
10589 /* Can't compare mask and non-mask types. */
10590 if (vectype1 && vectype2
10591 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10592 return false;
10594 /* Boolean values may have another representation in vectors
10595 and therefore we prefer bit operations over comparison for
10596 them (which also works for scalar masks). We store opcodes
10597 to use in bitop1 and bitop2. Statement is vectorized as
10598 BITOP2 (rhs1 BITOP1 rhs2) or
10599 rhs1 BITOP2 (BITOP1 rhs2)
10600 depending on bitop1 and bitop2 arity. */
10601 bool swap_p = false;
10602 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10604 if (code == GT_EXPR)
10606 bitop1 = BIT_NOT_EXPR;
10607 bitop2 = BIT_AND_EXPR;
10609 else if (code == GE_EXPR)
10611 bitop1 = BIT_NOT_EXPR;
10612 bitop2 = BIT_IOR_EXPR;
10614 else if (code == LT_EXPR)
10616 bitop1 = BIT_NOT_EXPR;
10617 bitop2 = BIT_AND_EXPR;
10618 swap_p = true;
10620 else if (code == LE_EXPR)
10622 bitop1 = BIT_NOT_EXPR;
10623 bitop2 = BIT_IOR_EXPR;
10624 swap_p = true;
10626 else
10628 bitop1 = BIT_XOR_EXPR;
10629 if (code == EQ_EXPR)
10630 bitop2 = BIT_NOT_EXPR;
10634 if (!vec_stmt)
10636 if (bitop1 == NOP_EXPR)
10638 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10639 return false;
10641 else
10643 machine_mode mode = TYPE_MODE (vectype);
10644 optab optab;
10646 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10647 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10648 return false;
10650 if (bitop2 != NOP_EXPR)
10652 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10653 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10654 return false;
10658 /* Put types on constant and invariant SLP children. */
10659 if (slp_node
10660 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10661 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10663 if (dump_enabled_p ())
10664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10665 "incompatible vector types for invariants\n");
10666 return false;
10669 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10670 vect_model_simple_cost (vinfo, stmt_info,
10671 ncopies * (1 + (bitop2 != NOP_EXPR)),
10672 dts, ndts, slp_node, cost_vec);
10673 return true;
10676 /* Transform. */
10678 /* Handle def. */
10679 lhs = gimple_assign_lhs (stmt);
10680 mask = vect_create_destination_var (lhs, mask_type);
10682 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10683 rhs1, &vec_oprnds0, vectype,
10684 rhs2, &vec_oprnds1, vectype);
10685 if (swap_p)
10686 std::swap (vec_oprnds0, vec_oprnds1);
10688 /* Arguments are ready. Create the new vector stmt. */
10689 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10691 gimple *new_stmt;
10692 vec_rhs2 = vec_oprnds1[i];
10694 new_temp = make_ssa_name (mask);
10695 if (bitop1 == NOP_EXPR)
10697 new_stmt = gimple_build_assign (new_temp, code,
10698 vec_rhs1, vec_rhs2);
10699 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10701 else
10703 if (bitop1 == BIT_NOT_EXPR)
10704 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10705 else
10706 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10707 vec_rhs2);
10708 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10709 if (bitop2 != NOP_EXPR)
10711 tree res = make_ssa_name (mask);
10712 if (bitop2 == BIT_NOT_EXPR)
10713 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10714 else
10715 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10716 new_temp);
10717 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10720 if (slp_node)
10721 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10722 else
10723 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10726 if (!slp_node)
10727 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10729 vec_oprnds0.release ();
10730 vec_oprnds1.release ();
10732 return true;
10735 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10736 can handle all live statements in the node. Otherwise return true
10737 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10738 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10740 static bool
10741 can_vectorize_live_stmts (vec_info *vinfo,
10742 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10743 slp_tree slp_node, slp_instance slp_node_instance,
10744 bool vec_stmt_p,
10745 stmt_vector_for_cost *cost_vec)
10747 if (slp_node)
10749 stmt_vec_info slp_stmt_info;
10750 unsigned int i;
10751 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10753 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10754 && !vectorizable_live_operation (vinfo,
10755 slp_stmt_info, gsi, slp_node,
10756 slp_node_instance, i,
10757 vec_stmt_p, cost_vec))
10758 return false;
10761 else if (STMT_VINFO_LIVE_P (stmt_info)
10762 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10763 slp_node, slp_node_instance, -1,
10764 vec_stmt_p, cost_vec))
10765 return false;
10767 return true;
10770 /* Make sure the statement is vectorizable. */
10772 opt_result
10773 vect_analyze_stmt (vec_info *vinfo,
10774 stmt_vec_info stmt_info, bool *need_to_vectorize,
10775 slp_tree node, slp_instance node_instance,
10776 stmt_vector_for_cost *cost_vec)
10778 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10779 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10780 bool ok;
10781 gimple_seq pattern_def_seq;
10783 if (dump_enabled_p ())
10784 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10785 stmt_info->stmt);
10787 if (gimple_has_volatile_ops (stmt_info->stmt))
10788 return opt_result::failure_at (stmt_info->stmt,
10789 "not vectorized:"
10790 " stmt has volatile operands: %G\n",
10791 stmt_info->stmt);
10793 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10794 && node == NULL
10795 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10797 gimple_stmt_iterator si;
10799 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10801 stmt_vec_info pattern_def_stmt_info
10802 = vinfo->lookup_stmt (gsi_stmt (si));
10803 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10804 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10806 /* Analyze def stmt of STMT if it's a pattern stmt. */
10807 if (dump_enabled_p ())
10808 dump_printf_loc (MSG_NOTE, vect_location,
10809 "==> examining pattern def statement: %G",
10810 pattern_def_stmt_info->stmt);
10812 opt_result res
10813 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10814 need_to_vectorize, node, node_instance,
10815 cost_vec);
10816 if (!res)
10817 return res;
10822 /* Skip stmts that do not need to be vectorized. In loops this is expected
10823 to include:
10824 - the COND_EXPR which is the loop exit condition
10825 - any LABEL_EXPRs in the loop
10826 - computations that are used only for array indexing or loop control.
10827 In basic blocks we only analyze statements that are a part of some SLP
10828 instance, therefore, all the statements are relevant.
10830 Pattern statement needs to be analyzed instead of the original statement
10831 if the original statement is not relevant. Otherwise, we analyze both
10832 statements. In basic blocks we are called from some SLP instance
10833 traversal, don't analyze pattern stmts instead, the pattern stmts
10834 already will be part of SLP instance. */
10836 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10837 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10838 && !STMT_VINFO_LIVE_P (stmt_info))
10840 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10841 && pattern_stmt_info
10842 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10843 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10845 /* Analyze PATTERN_STMT instead of the original stmt. */
10846 stmt_info = pattern_stmt_info;
10847 if (dump_enabled_p ())
10848 dump_printf_loc (MSG_NOTE, vect_location,
10849 "==> examining pattern statement: %G",
10850 stmt_info->stmt);
10852 else
10854 if (dump_enabled_p ())
10855 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10857 return opt_result::success ();
10860 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10861 && node == NULL
10862 && pattern_stmt_info
10863 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10864 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10866 /* Analyze PATTERN_STMT too. */
10867 if (dump_enabled_p ())
10868 dump_printf_loc (MSG_NOTE, vect_location,
10869 "==> examining pattern statement: %G",
10870 pattern_stmt_info->stmt);
10872 opt_result res
10873 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10874 node_instance, cost_vec);
10875 if (!res)
10876 return res;
10879 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10881 case vect_internal_def:
10882 break;
10884 case vect_reduction_def:
10885 case vect_nested_cycle:
10886 gcc_assert (!bb_vinfo
10887 && (relevance == vect_used_in_outer
10888 || relevance == vect_used_in_outer_by_reduction
10889 || relevance == vect_used_by_reduction
10890 || relevance == vect_unused_in_scope
10891 || relevance == vect_used_only_live));
10892 break;
10894 case vect_induction_def:
10895 gcc_assert (!bb_vinfo);
10896 break;
10898 case vect_constant_def:
10899 case vect_external_def:
10900 case vect_unknown_def_type:
10901 default:
10902 gcc_unreachable ();
10905 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
10906 if (node)
10907 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
10909 if (STMT_VINFO_RELEVANT_P (stmt_info))
10911 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10912 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10913 || (call && gimple_call_lhs (call) == NULL_TREE));
10914 *need_to_vectorize = true;
10917 if (PURE_SLP_STMT (stmt_info) && !node)
10919 if (dump_enabled_p ())
10920 dump_printf_loc (MSG_NOTE, vect_location,
10921 "handled only by SLP analysis\n");
10922 return opt_result::success ();
10925 ok = true;
10926 if (!bb_vinfo
10927 && (STMT_VINFO_RELEVANT_P (stmt_info)
10928 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10929 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10930 -mveclibabi= takes preference over library functions with
10931 the simd attribute. */
10932 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10933 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10934 cost_vec)
10935 || vectorizable_conversion (vinfo, stmt_info,
10936 NULL, NULL, node, cost_vec)
10937 || vectorizable_operation (vinfo, stmt_info,
10938 NULL, NULL, node, cost_vec)
10939 || vectorizable_assignment (vinfo, stmt_info,
10940 NULL, NULL, node, cost_vec)
10941 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10942 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10943 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10944 node, node_instance, cost_vec)
10945 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10946 NULL, node, cost_vec)
10947 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10948 || vectorizable_condition (vinfo, stmt_info,
10949 NULL, NULL, node, cost_vec)
10950 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10951 cost_vec)
10952 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10953 stmt_info, NULL, node));
10954 else
10956 if (bb_vinfo)
10957 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10958 || vectorizable_simd_clone_call (vinfo, stmt_info,
10959 NULL, NULL, node, cost_vec)
10960 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10961 cost_vec)
10962 || vectorizable_shift (vinfo, stmt_info,
10963 NULL, NULL, node, cost_vec)
10964 || vectorizable_operation (vinfo, stmt_info,
10965 NULL, NULL, node, cost_vec)
10966 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10967 cost_vec)
10968 || vectorizable_load (vinfo, stmt_info,
10969 NULL, NULL, node, cost_vec)
10970 || vectorizable_store (vinfo, stmt_info,
10971 NULL, NULL, node, cost_vec)
10972 || vectorizable_condition (vinfo, stmt_info,
10973 NULL, NULL, node, cost_vec)
10974 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10975 cost_vec)
10976 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
10979 if (node)
10980 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
10982 if (!ok)
10983 return opt_result::failure_at (stmt_info->stmt,
10984 "not vectorized:"
10985 " relevant stmt not supported: %G",
10986 stmt_info->stmt);
10988 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10989 need extra handling, except for vectorizable reductions. */
10990 if (!bb_vinfo
10991 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10992 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10993 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10994 stmt_info, NULL, node, node_instance,
10995 false, cost_vec))
10996 return opt_result::failure_at (stmt_info->stmt,
10997 "not vectorized:"
10998 " live stmt not supported: %G",
10999 stmt_info->stmt);
11001 return opt_result::success ();
11005 /* Function vect_transform_stmt.
11007 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11009 bool
11010 vect_transform_stmt (vec_info *vinfo,
11011 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11012 slp_tree slp_node, slp_instance slp_node_instance)
11014 bool is_store = false;
11015 gimple *vec_stmt = NULL;
11016 bool done;
11018 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11020 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11021 if (slp_node)
11022 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11024 switch (STMT_VINFO_TYPE (stmt_info))
11026 case type_demotion_vec_info_type:
11027 case type_promotion_vec_info_type:
11028 case type_conversion_vec_info_type:
11029 done = vectorizable_conversion (vinfo, stmt_info,
11030 gsi, &vec_stmt, slp_node, NULL);
11031 gcc_assert (done);
11032 break;
11034 case induc_vec_info_type:
11035 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11036 stmt_info, &vec_stmt, slp_node,
11037 NULL);
11038 gcc_assert (done);
11039 break;
11041 case shift_vec_info_type:
11042 done = vectorizable_shift (vinfo, stmt_info,
11043 gsi, &vec_stmt, slp_node, NULL);
11044 gcc_assert (done);
11045 break;
11047 case op_vec_info_type:
11048 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11049 NULL);
11050 gcc_assert (done);
11051 break;
11053 case assignment_vec_info_type:
11054 done = vectorizable_assignment (vinfo, stmt_info,
11055 gsi, &vec_stmt, slp_node, NULL);
11056 gcc_assert (done);
11057 break;
11059 case load_vec_info_type:
11060 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11061 NULL);
11062 gcc_assert (done);
11063 break;
11065 case store_vec_info_type:
11066 done = vectorizable_store (vinfo, stmt_info,
11067 gsi, &vec_stmt, slp_node, NULL);
11068 gcc_assert (done);
11069 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11071 /* In case of interleaving, the whole chain is vectorized when the
11072 last store in the chain is reached. Store stmts before the last
11073 one are skipped, and there vec_stmt_info shouldn't be freed
11074 meanwhile. */
11075 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11076 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11077 is_store = true;
11079 else
11080 is_store = true;
11081 break;
11083 case condition_vec_info_type:
11084 done = vectorizable_condition (vinfo, stmt_info,
11085 gsi, &vec_stmt, slp_node, NULL);
11086 gcc_assert (done);
11087 break;
11089 case comparison_vec_info_type:
11090 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11091 slp_node, NULL);
11092 gcc_assert (done);
11093 break;
11095 case call_vec_info_type:
11096 done = vectorizable_call (vinfo, stmt_info,
11097 gsi, &vec_stmt, slp_node, NULL);
11098 break;
11100 case call_simd_clone_vec_info_type:
11101 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11102 slp_node, NULL);
11103 break;
11105 case reduc_vec_info_type:
11106 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11107 gsi, &vec_stmt, slp_node);
11108 gcc_assert (done);
11109 break;
11111 case cycle_phi_info_type:
11112 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11113 &vec_stmt, slp_node, slp_node_instance);
11114 gcc_assert (done);
11115 break;
11117 case lc_phi_info_type:
11118 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11119 stmt_info, &vec_stmt, slp_node);
11120 gcc_assert (done);
11121 break;
11123 case phi_info_type:
11124 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11125 gcc_assert (done);
11126 break;
11128 default:
11129 if (!STMT_VINFO_LIVE_P (stmt_info))
11131 if (dump_enabled_p ())
11132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11133 "stmt not supported.\n");
11134 gcc_unreachable ();
11136 done = true;
11139 if (!slp_node && vec_stmt)
11140 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11142 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11144 /* Handle stmts whose DEF is used outside the loop-nest that is
11145 being vectorized. */
11146 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11147 slp_node_instance, true, NULL);
11148 gcc_assert (done);
11151 if (slp_node)
11152 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11154 return is_store;
11158 /* Remove a group of stores (for SLP or interleaving), free their
11159 stmt_vec_info. */
11161 void
11162 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11164 stmt_vec_info next_stmt_info = first_stmt_info;
11166 while (next_stmt_info)
11168 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11169 next_stmt_info = vect_orig_stmt (next_stmt_info);
11170 /* Free the attached stmt_vec_info and remove the stmt. */
11171 vinfo->remove_stmt (next_stmt_info);
11172 next_stmt_info = tmp;
11176 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11177 elements of type SCALAR_TYPE, or null if the target doesn't support
11178 such a type.
11180 If NUNITS is zero, return a vector type that contains elements of
11181 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11183 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11184 for this vectorization region and want to "autodetect" the best choice.
11185 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11186 and we want the new type to be interoperable with it. PREVAILING_MODE
11187 in this case can be a scalar integer mode or a vector mode; when it
11188 is a vector mode, the function acts like a tree-level version of
11189 related_vector_mode. */
11191 tree
11192 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11193 tree scalar_type, poly_uint64 nunits)
11195 tree orig_scalar_type = scalar_type;
11196 scalar_mode inner_mode;
11197 machine_mode simd_mode;
11198 tree vectype;
11200 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11201 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11202 return NULL_TREE;
11204 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11206 /* For vector types of elements whose mode precision doesn't
11207 match their types precision we use a element type of mode
11208 precision. The vectorization routines will have to make sure
11209 they support the proper result truncation/extension.
11210 We also make sure to build vector types with INTEGER_TYPE
11211 component type only. */
11212 if (INTEGRAL_TYPE_P (scalar_type)
11213 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11214 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11215 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11216 TYPE_UNSIGNED (scalar_type));
11218 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11219 When the component mode passes the above test simply use a type
11220 corresponding to that mode. The theory is that any use that
11221 would cause problems with this will disable vectorization anyway. */
11222 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11223 && !INTEGRAL_TYPE_P (scalar_type))
11224 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11226 /* We can't build a vector type of elements with alignment bigger than
11227 their size. */
11228 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11229 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11230 TYPE_UNSIGNED (scalar_type));
11232 /* If we felt back to using the mode fail if there was
11233 no scalar type for it. */
11234 if (scalar_type == NULL_TREE)
11235 return NULL_TREE;
11237 /* If no prevailing mode was supplied, use the mode the target prefers.
11238 Otherwise lookup a vector mode based on the prevailing mode. */
11239 if (prevailing_mode == VOIDmode)
11241 gcc_assert (known_eq (nunits, 0U));
11242 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11243 if (SCALAR_INT_MODE_P (simd_mode))
11245 /* Traditional behavior is not to take the integer mode
11246 literally, but simply to use it as a way of determining
11247 the vector size. It is up to mode_for_vector to decide
11248 what the TYPE_MODE should be.
11250 Note that nunits == 1 is allowed in order to support single
11251 element vector types. */
11252 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11253 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11254 return NULL_TREE;
11257 else if (SCALAR_INT_MODE_P (prevailing_mode)
11258 || !related_vector_mode (prevailing_mode,
11259 inner_mode, nunits).exists (&simd_mode))
11261 /* Fall back to using mode_for_vector, mostly in the hope of being
11262 able to use an integer mode. */
11263 if (known_eq (nunits, 0U)
11264 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11265 return NULL_TREE;
11267 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11268 return NULL_TREE;
11271 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11273 /* In cases where the mode was chosen by mode_for_vector, check that
11274 the target actually supports the chosen mode, or that it at least
11275 allows the vector mode to be replaced by a like-sized integer. */
11276 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11277 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11278 return NULL_TREE;
11280 /* Re-attach the address-space qualifier if we canonicalized the scalar
11281 type. */
11282 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11283 return build_qualified_type
11284 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11286 return vectype;
11289 /* Function get_vectype_for_scalar_type.
11291 Returns the vector type corresponding to SCALAR_TYPE as supported
11292 by the target. If GROUP_SIZE is nonzero and we're performing BB
11293 vectorization, make sure that the number of elements in the vector
11294 is no bigger than GROUP_SIZE. */
11296 tree
11297 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11298 unsigned int group_size)
11300 /* For BB vectorization, we should always have a group size once we've
11301 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11302 are tentative requests during things like early data reference
11303 analysis and pattern recognition. */
11304 if (is_a <bb_vec_info> (vinfo))
11305 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11306 else
11307 group_size = 0;
11309 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11310 scalar_type);
11311 if (vectype && vinfo->vector_mode == VOIDmode)
11312 vinfo->vector_mode = TYPE_MODE (vectype);
11314 /* Register the natural choice of vector type, before the group size
11315 has been applied. */
11316 if (vectype)
11317 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11319 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11320 try again with an explicit number of elements. */
11321 if (vectype
11322 && group_size
11323 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11325 /* Start with the biggest number of units that fits within
11326 GROUP_SIZE and halve it until we find a valid vector type.
11327 Usually either the first attempt will succeed or all will
11328 fail (in the latter case because GROUP_SIZE is too small
11329 for the target), but it's possible that a target could have
11330 a hole between supported vector types.
11332 If GROUP_SIZE is not a power of 2, this has the effect of
11333 trying the largest power of 2 that fits within the group,
11334 even though the group is not a multiple of that vector size.
11335 The BB vectorizer will then try to carve up the group into
11336 smaller pieces. */
11337 unsigned int nunits = 1 << floor_log2 (group_size);
11340 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11341 scalar_type, nunits);
11342 nunits /= 2;
11344 while (nunits > 1 && !vectype);
11347 return vectype;
11350 /* Return the vector type corresponding to SCALAR_TYPE as supported
11351 by the target. NODE, if nonnull, is the SLP tree node that will
11352 use the returned vector type. */
11354 tree
11355 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11357 unsigned int group_size = 0;
11358 if (node)
11359 group_size = SLP_TREE_LANES (node);
11360 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11363 /* Function get_mask_type_for_scalar_type.
11365 Returns the mask type corresponding to a result of comparison
11366 of vectors of specified SCALAR_TYPE as supported by target.
11367 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11368 make sure that the number of elements in the vector is no bigger
11369 than GROUP_SIZE. */
11371 tree
11372 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11373 unsigned int group_size)
11375 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11377 if (!vectype)
11378 return NULL;
11380 return truth_type_for (vectype);
11383 /* Function get_same_sized_vectype
11385 Returns a vector type corresponding to SCALAR_TYPE of size
11386 VECTOR_TYPE if supported by the target. */
11388 tree
11389 get_same_sized_vectype (tree scalar_type, tree vector_type)
11391 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11392 return truth_type_for (vector_type);
11394 poly_uint64 nunits;
11395 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11396 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11397 return NULL_TREE;
11399 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11400 scalar_type, nunits);
11403 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11404 would not change the chosen vector modes. */
11406 bool
11407 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11409 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11410 i != vinfo->used_vector_modes.end (); ++i)
11411 if (!VECTOR_MODE_P (*i)
11412 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11413 return false;
11414 return true;
11417 /* Function vect_is_simple_use.
11419 Input:
11420 VINFO - the vect info of the loop or basic block that is being vectorized.
11421 OPERAND - operand in the loop or bb.
11422 Output:
11423 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11424 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11425 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11426 the definition could be anywhere in the function
11427 DT - the type of definition
11429 Returns whether a stmt with OPERAND can be vectorized.
11430 For loops, supportable operands are constants, loop invariants, and operands
11431 that are defined by the current iteration of the loop. Unsupportable
11432 operands are those that are defined by a previous iteration of the loop (as
11433 is the case in reduction/induction computations).
11434 For basic blocks, supportable operands are constants and bb invariants.
11435 For now, operands defined outside the basic block are not supported. */
11437 bool
11438 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11439 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11441 if (def_stmt_info_out)
11442 *def_stmt_info_out = NULL;
11443 if (def_stmt_out)
11444 *def_stmt_out = NULL;
11445 *dt = vect_unknown_def_type;
11447 if (dump_enabled_p ())
11449 dump_printf_loc (MSG_NOTE, vect_location,
11450 "vect_is_simple_use: operand ");
11451 if (TREE_CODE (operand) == SSA_NAME
11452 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11453 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11454 else
11455 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11458 if (CONSTANT_CLASS_P (operand))
11459 *dt = vect_constant_def;
11460 else if (is_gimple_min_invariant (operand))
11461 *dt = vect_external_def;
11462 else if (TREE_CODE (operand) != SSA_NAME)
11463 *dt = vect_unknown_def_type;
11464 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11465 *dt = vect_external_def;
11466 else
11468 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11469 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11470 if (!stmt_vinfo)
11471 *dt = vect_external_def;
11472 else
11474 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11475 def_stmt = stmt_vinfo->stmt;
11476 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11477 if (def_stmt_info_out)
11478 *def_stmt_info_out = stmt_vinfo;
11480 if (def_stmt_out)
11481 *def_stmt_out = def_stmt;
11484 if (dump_enabled_p ())
11486 dump_printf (MSG_NOTE, ", type of def: ");
11487 switch (*dt)
11489 case vect_uninitialized_def:
11490 dump_printf (MSG_NOTE, "uninitialized\n");
11491 break;
11492 case vect_constant_def:
11493 dump_printf (MSG_NOTE, "constant\n");
11494 break;
11495 case vect_external_def:
11496 dump_printf (MSG_NOTE, "external\n");
11497 break;
11498 case vect_internal_def:
11499 dump_printf (MSG_NOTE, "internal\n");
11500 break;
11501 case vect_induction_def:
11502 dump_printf (MSG_NOTE, "induction\n");
11503 break;
11504 case vect_reduction_def:
11505 dump_printf (MSG_NOTE, "reduction\n");
11506 break;
11507 case vect_double_reduction_def:
11508 dump_printf (MSG_NOTE, "double reduction\n");
11509 break;
11510 case vect_nested_cycle:
11511 dump_printf (MSG_NOTE, "nested cycle\n");
11512 break;
11513 case vect_unknown_def_type:
11514 dump_printf (MSG_NOTE, "unknown\n");
11515 break;
11519 if (*dt == vect_unknown_def_type)
11521 if (dump_enabled_p ())
11522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11523 "Unsupported pattern.\n");
11524 return false;
11527 return true;
11530 /* Function vect_is_simple_use.
11532 Same as vect_is_simple_use but also determines the vector operand
11533 type of OPERAND and stores it to *VECTYPE. If the definition of
11534 OPERAND is vect_uninitialized_def, vect_constant_def or
11535 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11536 is responsible to compute the best suited vector type for the
11537 scalar operand. */
11539 bool
11540 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11541 tree *vectype, stmt_vec_info *def_stmt_info_out,
11542 gimple **def_stmt_out)
11544 stmt_vec_info def_stmt_info;
11545 gimple *def_stmt;
11546 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11547 return false;
11549 if (def_stmt_out)
11550 *def_stmt_out = def_stmt;
11551 if (def_stmt_info_out)
11552 *def_stmt_info_out = def_stmt_info;
11554 /* Now get a vector type if the def is internal, otherwise supply
11555 NULL_TREE and leave it up to the caller to figure out a proper
11556 type for the use stmt. */
11557 if (*dt == vect_internal_def
11558 || *dt == vect_induction_def
11559 || *dt == vect_reduction_def
11560 || *dt == vect_double_reduction_def
11561 || *dt == vect_nested_cycle)
11563 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11564 gcc_assert (*vectype != NULL_TREE);
11565 if (dump_enabled_p ())
11566 dump_printf_loc (MSG_NOTE, vect_location,
11567 "vect_is_simple_use: vectype %T\n", *vectype);
11569 else if (*dt == vect_uninitialized_def
11570 || *dt == vect_constant_def
11571 || *dt == vect_external_def)
11572 *vectype = NULL_TREE;
11573 else
11574 gcc_unreachable ();
11576 return true;
11579 /* Function vect_is_simple_use.
11581 Same as vect_is_simple_use but determines the operand by operand
11582 position OPERAND from either STMT or SLP_NODE, filling in *OP
11583 and *SLP_DEF (when SLP_NODE is not NULL). */
11585 bool
11586 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11587 unsigned operand, tree *op, slp_tree *slp_def,
11588 enum vect_def_type *dt,
11589 tree *vectype, stmt_vec_info *def_stmt_info_out)
11591 if (slp_node)
11593 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11594 *slp_def = child;
11595 *vectype = SLP_TREE_VECTYPE (child);
11596 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11598 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11599 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11601 else
11603 if (def_stmt_info_out)
11604 *def_stmt_info_out = NULL;
11605 *op = SLP_TREE_SCALAR_OPS (child)[0];
11606 *dt = SLP_TREE_DEF_TYPE (child);
11607 return true;
11610 else
11612 *slp_def = NULL;
11613 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11615 if (gimple_assign_rhs_code (ass) == COND_EXPR
11616 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11618 if (operand < 2)
11619 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11620 else
11621 *op = gimple_op (ass, operand);
11623 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11624 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11625 else
11626 *op = gimple_op (ass, operand + 1);
11628 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11629 *op = gimple_call_arg (call, operand);
11630 else
11631 gcc_unreachable ();
11632 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11636 /* If OP is not NULL and is external or constant update its vector
11637 type with VECTYPE. Returns true if successful or false if not,
11638 for example when conflicting vector types are present. */
11640 bool
11641 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11643 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11644 return true;
11645 if (SLP_TREE_VECTYPE (op))
11646 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11647 SLP_TREE_VECTYPE (op) = vectype;
11648 return true;
11651 /* Function supportable_widening_operation
11653 Check whether an operation represented by the code CODE is a
11654 widening operation that is supported by the target platform in
11655 vector form (i.e., when operating on arguments of type VECTYPE_IN
11656 producing a result of type VECTYPE_OUT).
11658 Widening operations we currently support are NOP (CONVERT), FLOAT,
11659 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11660 are supported by the target platform either directly (via vector
11661 tree-codes), or via target builtins.
11663 Output:
11664 - CODE1 and CODE2 are codes of vector operations to be used when
11665 vectorizing the operation, if available.
11666 - MULTI_STEP_CVT determines the number of required intermediate steps in
11667 case of multi-step conversion (like char->short->int - in that case
11668 MULTI_STEP_CVT will be 1).
11669 - INTERM_TYPES contains the intermediate type required to perform the
11670 widening operation (short in the above example). */
11672 bool
11673 supportable_widening_operation (vec_info *vinfo,
11674 enum tree_code code, stmt_vec_info stmt_info,
11675 tree vectype_out, tree vectype_in,
11676 enum tree_code *code1, enum tree_code *code2,
11677 int *multi_step_cvt,
11678 vec<tree> *interm_types)
11680 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11681 class loop *vect_loop = NULL;
11682 machine_mode vec_mode;
11683 enum insn_code icode1, icode2;
11684 optab optab1, optab2;
11685 tree vectype = vectype_in;
11686 tree wide_vectype = vectype_out;
11687 enum tree_code c1, c2;
11688 int i;
11689 tree prev_type, intermediate_type;
11690 machine_mode intermediate_mode, prev_mode;
11691 optab optab3, optab4;
11693 *multi_step_cvt = 0;
11694 if (loop_info)
11695 vect_loop = LOOP_VINFO_LOOP (loop_info);
11697 switch (code)
11699 case WIDEN_MULT_EXPR:
11700 /* The result of a vectorized widening operation usually requires
11701 two vectors (because the widened results do not fit into one vector).
11702 The generated vector results would normally be expected to be
11703 generated in the same order as in the original scalar computation,
11704 i.e. if 8 results are generated in each vector iteration, they are
11705 to be organized as follows:
11706 vect1: [res1,res2,res3,res4],
11707 vect2: [res5,res6,res7,res8].
11709 However, in the special case that the result of the widening
11710 operation is used in a reduction computation only, the order doesn't
11711 matter (because when vectorizing a reduction we change the order of
11712 the computation). Some targets can take advantage of this and
11713 generate more efficient code. For example, targets like Altivec,
11714 that support widen_mult using a sequence of {mult_even,mult_odd}
11715 generate the following vectors:
11716 vect1: [res1,res3,res5,res7],
11717 vect2: [res2,res4,res6,res8].
11719 When vectorizing outer-loops, we execute the inner-loop sequentially
11720 (each vectorized inner-loop iteration contributes to VF outer-loop
11721 iterations in parallel). We therefore don't allow to change the
11722 order of the computation in the inner-loop during outer-loop
11723 vectorization. */
11724 /* TODO: Another case in which order doesn't *really* matter is when we
11725 widen and then contract again, e.g. (short)((int)x * y >> 8).
11726 Normally, pack_trunc performs an even/odd permute, whereas the
11727 repack from an even/odd expansion would be an interleave, which
11728 would be significantly simpler for e.g. AVX2. */
11729 /* In any case, in order to avoid duplicating the code below, recurse
11730 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11731 are properly set up for the caller. If we fail, we'll continue with
11732 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11733 if (vect_loop
11734 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11735 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11736 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11737 stmt_info, vectype_out,
11738 vectype_in, code1, code2,
11739 multi_step_cvt, interm_types))
11741 /* Elements in a vector with vect_used_by_reduction property cannot
11742 be reordered if the use chain with this property does not have the
11743 same operation. One such an example is s += a * b, where elements
11744 in a and b cannot be reordered. Here we check if the vector defined
11745 by STMT is only directly used in the reduction statement. */
11746 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11747 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11748 if (use_stmt_info
11749 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11750 return true;
11752 c1 = VEC_WIDEN_MULT_LO_EXPR;
11753 c2 = VEC_WIDEN_MULT_HI_EXPR;
11754 break;
11756 case DOT_PROD_EXPR:
11757 c1 = DOT_PROD_EXPR;
11758 c2 = DOT_PROD_EXPR;
11759 break;
11761 case SAD_EXPR:
11762 c1 = SAD_EXPR;
11763 c2 = SAD_EXPR;
11764 break;
11766 case VEC_WIDEN_MULT_EVEN_EXPR:
11767 /* Support the recursion induced just above. */
11768 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11769 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11770 break;
11772 case WIDEN_LSHIFT_EXPR:
11773 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11774 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11775 break;
11777 case WIDEN_PLUS_EXPR:
11778 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11779 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11780 break;
11782 case WIDEN_MINUS_EXPR:
11783 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11784 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11785 break;
11787 CASE_CONVERT:
11788 c1 = VEC_UNPACK_LO_EXPR;
11789 c2 = VEC_UNPACK_HI_EXPR;
11790 break;
11792 case FLOAT_EXPR:
11793 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11794 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11795 break;
11797 case FIX_TRUNC_EXPR:
11798 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11799 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11800 break;
11802 default:
11803 gcc_unreachable ();
11806 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11807 std::swap (c1, c2);
11809 if (code == FIX_TRUNC_EXPR)
11811 /* The signedness is determined from output operand. */
11812 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11813 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11815 else if (CONVERT_EXPR_CODE_P (code)
11816 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11817 && VECTOR_BOOLEAN_TYPE_P (vectype)
11818 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11819 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11821 /* If the input and result modes are the same, a different optab
11822 is needed where we pass in the number of units in vectype. */
11823 optab1 = vec_unpacks_sbool_lo_optab;
11824 optab2 = vec_unpacks_sbool_hi_optab;
11826 else
11828 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11829 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11832 if (!optab1 || !optab2)
11833 return false;
11835 vec_mode = TYPE_MODE (vectype);
11836 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11837 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11838 return false;
11840 *code1 = c1;
11841 *code2 = c2;
11843 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11844 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11846 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11847 return true;
11848 /* For scalar masks we may have different boolean
11849 vector types having the same QImode. Thus we
11850 add additional check for elements number. */
11851 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11852 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11853 return true;
11856 /* Check if it's a multi-step conversion that can be done using intermediate
11857 types. */
11859 prev_type = vectype;
11860 prev_mode = vec_mode;
11862 if (!CONVERT_EXPR_CODE_P (code))
11863 return false;
11865 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11866 intermediate steps in promotion sequence. We try
11867 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11868 not. */
11869 interm_types->create (MAX_INTERM_CVT_STEPS);
11870 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11872 intermediate_mode = insn_data[icode1].operand[0].mode;
11873 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11874 intermediate_type
11875 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11876 else
11877 intermediate_type
11878 = lang_hooks.types.type_for_mode (intermediate_mode,
11879 TYPE_UNSIGNED (prev_type));
11881 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11882 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11883 && intermediate_mode == prev_mode
11884 && SCALAR_INT_MODE_P (prev_mode))
11886 /* If the input and result modes are the same, a different optab
11887 is needed where we pass in the number of units in vectype. */
11888 optab3 = vec_unpacks_sbool_lo_optab;
11889 optab4 = vec_unpacks_sbool_hi_optab;
11891 else
11893 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11894 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11897 if (!optab3 || !optab4
11898 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11899 || insn_data[icode1].operand[0].mode != intermediate_mode
11900 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11901 || insn_data[icode2].operand[0].mode != intermediate_mode
11902 || ((icode1 = optab_handler (optab3, intermediate_mode))
11903 == CODE_FOR_nothing)
11904 || ((icode2 = optab_handler (optab4, intermediate_mode))
11905 == CODE_FOR_nothing))
11906 break;
11908 interm_types->quick_push (intermediate_type);
11909 (*multi_step_cvt)++;
11911 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11912 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11914 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11915 return true;
11916 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11917 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11918 return true;
11921 prev_type = intermediate_type;
11922 prev_mode = intermediate_mode;
11925 interm_types->release ();
11926 return false;
11930 /* Function supportable_narrowing_operation
11932 Check whether an operation represented by the code CODE is a
11933 narrowing operation that is supported by the target platform in
11934 vector form (i.e., when operating on arguments of type VECTYPE_IN
11935 and producing a result of type VECTYPE_OUT).
11937 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11938 and FLOAT. This function checks if these operations are supported by
11939 the target platform directly via vector tree-codes.
11941 Output:
11942 - CODE1 is the code of a vector operation to be used when
11943 vectorizing the operation, if available.
11944 - MULTI_STEP_CVT determines the number of required intermediate steps in
11945 case of multi-step conversion (like int->short->char - in that case
11946 MULTI_STEP_CVT will be 1).
11947 - INTERM_TYPES contains the intermediate type required to perform the
11948 narrowing operation (short in the above example). */
11950 bool
11951 supportable_narrowing_operation (enum tree_code code,
11952 tree vectype_out, tree vectype_in,
11953 enum tree_code *code1, int *multi_step_cvt,
11954 vec<tree> *interm_types)
11956 machine_mode vec_mode;
11957 enum insn_code icode1;
11958 optab optab1, interm_optab;
11959 tree vectype = vectype_in;
11960 tree narrow_vectype = vectype_out;
11961 enum tree_code c1;
11962 tree intermediate_type, prev_type;
11963 machine_mode intermediate_mode, prev_mode;
11964 int i;
11965 bool uns;
11967 *multi_step_cvt = 0;
11968 switch (code)
11970 CASE_CONVERT:
11971 c1 = VEC_PACK_TRUNC_EXPR;
11972 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11973 && VECTOR_BOOLEAN_TYPE_P (vectype)
11974 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11975 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11976 optab1 = vec_pack_sbool_trunc_optab;
11977 else
11978 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11979 break;
11981 case FIX_TRUNC_EXPR:
11982 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11983 /* The signedness is determined from output operand. */
11984 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11985 break;
11987 case FLOAT_EXPR:
11988 c1 = VEC_PACK_FLOAT_EXPR;
11989 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11990 break;
11992 default:
11993 gcc_unreachable ();
11996 if (!optab1)
11997 return false;
11999 vec_mode = TYPE_MODE (vectype);
12000 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12001 return false;
12003 *code1 = c1;
12005 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12007 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12008 return true;
12009 /* For scalar masks we may have different boolean
12010 vector types having the same QImode. Thus we
12011 add additional check for elements number. */
12012 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12013 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12014 return true;
12017 if (code == FLOAT_EXPR)
12018 return false;
12020 /* Check if it's a multi-step conversion that can be done using intermediate
12021 types. */
12022 prev_mode = vec_mode;
12023 prev_type = vectype;
12024 if (code == FIX_TRUNC_EXPR)
12025 uns = TYPE_UNSIGNED (vectype_out);
12026 else
12027 uns = TYPE_UNSIGNED (vectype);
12029 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12030 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12031 costly than signed. */
12032 if (code == FIX_TRUNC_EXPR && uns)
12034 enum insn_code icode2;
12036 intermediate_type
12037 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12038 interm_optab
12039 = optab_for_tree_code (c1, intermediate_type, optab_default);
12040 if (interm_optab != unknown_optab
12041 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12042 && insn_data[icode1].operand[0].mode
12043 == insn_data[icode2].operand[0].mode)
12045 uns = false;
12046 optab1 = interm_optab;
12047 icode1 = icode2;
12051 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12052 intermediate steps in promotion sequence. We try
12053 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12054 interm_types->create (MAX_INTERM_CVT_STEPS);
12055 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12057 intermediate_mode = insn_data[icode1].operand[0].mode;
12058 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12059 intermediate_type
12060 = vect_double_mask_nunits (prev_type, intermediate_mode);
12061 else
12062 intermediate_type
12063 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12064 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12065 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12066 && intermediate_mode == prev_mode
12067 && SCALAR_INT_MODE_P (prev_mode))
12068 interm_optab = vec_pack_sbool_trunc_optab;
12069 else
12070 interm_optab
12071 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12072 optab_default);
12073 if (!interm_optab
12074 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12075 || insn_data[icode1].operand[0].mode != intermediate_mode
12076 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12077 == CODE_FOR_nothing))
12078 break;
12080 interm_types->quick_push (intermediate_type);
12081 (*multi_step_cvt)++;
12083 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12085 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12086 return true;
12087 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12088 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12089 return true;
12092 prev_mode = intermediate_mode;
12093 prev_type = intermediate_type;
12094 optab1 = interm_optab;
12097 interm_types->release ();
12098 return false;
12101 /* Generate and return a vector mask of MASK_TYPE such that
12102 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12103 Add the statements to SEQ. */
12105 tree
12106 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12107 tree end_index, const char *name)
12109 tree cmp_type = TREE_TYPE (start_index);
12110 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12111 cmp_type, mask_type,
12112 OPTIMIZE_FOR_SPEED));
12113 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12114 start_index, end_index,
12115 build_zero_cst (mask_type));
12116 tree tmp;
12117 if (name)
12118 tmp = make_temp_ssa_name (mask_type, NULL, name);
12119 else
12120 tmp = make_ssa_name (mask_type);
12121 gimple_call_set_lhs (call, tmp);
12122 gimple_seq_add_stmt (seq, call);
12123 return tmp;
12126 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12127 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12129 tree
12130 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12131 tree end_index)
12133 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12134 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12137 /* Try to compute the vector types required to vectorize STMT_INFO,
12138 returning true on success and false if vectorization isn't possible.
12139 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12140 take sure that the number of elements in the vectors is no bigger
12141 than GROUP_SIZE.
12143 On success:
12145 - Set *STMT_VECTYPE_OUT to:
12146 - NULL_TREE if the statement doesn't need to be vectorized;
12147 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12149 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12150 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12151 statement does not help to determine the overall number of units. */
12153 opt_result
12154 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12155 tree *stmt_vectype_out,
12156 tree *nunits_vectype_out,
12157 unsigned int group_size)
12159 gimple *stmt = stmt_info->stmt;
12161 /* For BB vectorization, we should always have a group size once we've
12162 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12163 are tentative requests during things like early data reference
12164 analysis and pattern recognition. */
12165 if (is_a <bb_vec_info> (vinfo))
12166 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12167 else
12168 group_size = 0;
12170 *stmt_vectype_out = NULL_TREE;
12171 *nunits_vectype_out = NULL_TREE;
12173 if (gimple_get_lhs (stmt) == NULL_TREE
12174 /* MASK_STORE has no lhs, but is ok. */
12175 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12177 if (is_a <gcall *> (stmt))
12179 /* Ignore calls with no lhs. These must be calls to
12180 #pragma omp simd functions, and what vectorization factor
12181 it really needs can't be determined until
12182 vectorizable_simd_clone_call. */
12183 if (dump_enabled_p ())
12184 dump_printf_loc (MSG_NOTE, vect_location,
12185 "defer to SIMD clone analysis.\n");
12186 return opt_result::success ();
12189 return opt_result::failure_at (stmt,
12190 "not vectorized: irregular stmt.%G", stmt);
12193 tree vectype;
12194 tree scalar_type = NULL_TREE;
12195 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12197 vectype = STMT_VINFO_VECTYPE (stmt_info);
12198 if (dump_enabled_p ())
12199 dump_printf_loc (MSG_NOTE, vect_location,
12200 "precomputed vectype: %T\n", vectype);
12202 else if (vect_use_mask_type_p (stmt_info))
12204 unsigned int precision = stmt_info->mask_precision;
12205 scalar_type = build_nonstandard_integer_type (precision, 1);
12206 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12207 if (!vectype)
12208 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12209 " data-type %T\n", scalar_type);
12210 if (dump_enabled_p ())
12211 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12213 else
12215 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12216 scalar_type = TREE_TYPE (DR_REF (dr));
12217 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12218 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12219 else
12220 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12222 if (dump_enabled_p ())
12224 if (group_size)
12225 dump_printf_loc (MSG_NOTE, vect_location,
12226 "get vectype for scalar type (group size %d):"
12227 " %T\n", group_size, scalar_type);
12228 else
12229 dump_printf_loc (MSG_NOTE, vect_location,
12230 "get vectype for scalar type: %T\n", scalar_type);
12232 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12233 if (!vectype)
12234 return opt_result::failure_at (stmt,
12235 "not vectorized:"
12236 " unsupported data-type %T\n",
12237 scalar_type);
12239 if (dump_enabled_p ())
12240 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12243 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12244 return opt_result::failure_at (stmt,
12245 "not vectorized: vector stmt in loop:%G",
12246 stmt);
12248 *stmt_vectype_out = vectype;
12250 /* Don't try to compute scalar types if the stmt produces a boolean
12251 vector; use the existing vector type instead. */
12252 tree nunits_vectype = vectype;
12253 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12255 /* The number of units is set according to the smallest scalar
12256 type (or the largest vector size, but we only support one
12257 vector size per vectorization). */
12258 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12259 TREE_TYPE (vectype));
12260 if (scalar_type != TREE_TYPE (vectype))
12262 if (dump_enabled_p ())
12263 dump_printf_loc (MSG_NOTE, vect_location,
12264 "get vectype for smallest scalar type: %T\n",
12265 scalar_type);
12266 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12267 group_size);
12268 if (!nunits_vectype)
12269 return opt_result::failure_at
12270 (stmt, "not vectorized: unsupported data-type %T\n",
12271 scalar_type);
12272 if (dump_enabled_p ())
12273 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12274 nunits_vectype);
12278 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12279 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12280 return opt_result::failure_at (stmt,
12281 "Not vectorized: Incompatible number "
12282 "of vector subparts between %T and %T\n",
12283 nunits_vectype, *stmt_vectype_out);
12285 if (dump_enabled_p ())
12287 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12288 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12289 dump_printf (MSG_NOTE, "\n");
12292 *nunits_vectype_out = nunits_vectype;
12293 return opt_result::success ();
12296 /* Generate and return statement sequence that sets vector length LEN that is:
12298 min_of_start_and_end = min (START_INDEX, END_INDEX);
12299 left_len = END_INDEX - min_of_start_and_end;
12300 rhs = min (left_len, LEN_LIMIT);
12301 LEN = rhs;
12303 Note: the cost of the code generated by this function is modeled
12304 by vect_estimate_min_profitable_iters, so changes here may need
12305 corresponding changes there. */
12307 gimple_seq
12308 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12310 gimple_seq stmts = NULL;
12311 tree len_type = TREE_TYPE (len);
12312 gcc_assert (TREE_TYPE (start_index) == len_type);
12314 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12315 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12316 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12317 gimple* stmt = gimple_build_assign (len, rhs);
12318 gimple_seq_add_stmt (&stmts, stmt);
12320 return stmts;