aix: Support libsupc++ as a FAT library
[official-gcc.git] / gcc / tree-vect-stmts.c
blob65e30bac4249d6a5fb18e4713556b4cee629a0e7
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
781 if (fatal)
782 *fatal = false;
783 return res;
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
806 int inside_cost = 0, prologue_cost = 0;
808 gcc_assert (cost_vec != NULL);
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
841 static void
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 enum vect_def_type *dt,
844 unsigned int ncopies, int pwr,
845 stmt_vector_for_cost *cost_vec)
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
850 for (i = 0; i < pwr + 1; i++)
852 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 stmt_info, 0, vect_body);
854 ncopies *= 2;
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost, prologue_cost);
869 /* Returns true if the current function returns DECL. */
871 static bool
872 cfun_returns (tree decl)
874 edge_iterator ei;
875 edge e;
876 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
878 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879 if (!ret)
880 continue;
881 if (gimple_return_retval (ret) == decl)
882 return true;
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
885 though. */
886 gimple *def = ret;
889 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
891 while (gimple_clobber_p (def));
892 if (is_a <gassign *> (def)
893 && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 && gimple_assign_rhs1 (def) == decl)
895 return true;
897 return false;
900 /* Function vect_model_store_cost
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
905 static void
906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 vect_memory_access_type memory_access_type,
908 vec_load_store_type vls_type, slp_tree slp_node,
909 stmt_vector_for_cost *cost_vec)
911 unsigned int inside_cost = 0, prologue_cost = 0;
912 stmt_vec_info first_stmt_info = stmt_info;
913 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
915 /* ??? Somehow we need to fix this at the callers. */
916 if (slp_node)
917 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
919 if (vls_type == VLS_STORE_INVARIANT)
921 if (!slp_node)
922 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt_info == stmt_info);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = DR_GROUP_SIZE (first_stmt_info);
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963 inside_cost += record_stmt_cost (cost_vec,
964 ncopies * assumed_nunits,
965 scalar_store, stmt_info, 0, vect_body);
967 else
968 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
970 if (memory_access_type == VMAT_ELEMENTWISE
971 || memory_access_type == VMAT_STRIDED_SLP)
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975 inside_cost += record_stmt_cost (cost_vec,
976 ncopies * assumed_nunits,
977 vec_to_scalar, stmt_info, 0, vect_body);
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985 if (base
986 && (TREE_CODE (base) == RESULT_DECL
987 || (DECL_P (base) && cfun_returns (base)))
988 && !aggregate_value_p (base, cfun->decl))
990 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
992 if (REG_P (reg))
994 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
997 if (nregs > 1)
999 /* Spill. */
1000 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 vector_store,
1002 stmt_info, 0, vect_epilogue);
1003 /* Loads. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 scalar_load,
1006 stmt_info, 0, vect_epilogue);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1018 /* Calculate cost of DR's memory access. */
1019 void
1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 unsigned int *inside_cost,
1022 stmt_vector_for_cost *body_cost_vec)
1024 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1028 switch (alignment_support_scheme)
1030 case dr_aligned:
1032 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 vector_store, stmt_info, 0,
1034 vect_body);
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: aligned.\n");
1039 break;
1042 case dr_unaligned_supported:
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 unaligned_store, stmt_info,
1047 DR_MISALIGNMENT (dr_info),
1048 vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_store_cost: unaligned supported by "
1052 "hardware.\n");
1053 break;
1056 case dr_unaligned_unsupported:
1058 *inside_cost = VECT_MAX_COST;
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062 "vect_model_store_cost: unsupported access.\n");
1063 break;
1066 default:
1067 gcc_unreachable ();
1072 /* Function vect_model_load_cost
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1079 static void
1080 vect_model_load_cost (vec_info *vinfo,
1081 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 vect_memory_access_type memory_access_type,
1083 slp_tree slp_node,
1084 stmt_vector_for_cost *cost_vec)
1086 unsigned int inside_cost = 0, prologue_cost = 0;
1087 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1089 gcc_assert (cost_vec);
1091 /* ??? Somehow we need to fix this at the callers. */
1092 if (slp_node)
1093 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1095 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms;
1102 unsigned assumed_nunits
1103 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1104 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1105 vf, true, &n_perms);
1106 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1107 first_stmt_info, 0, vect_body);
1108 /* And adjust the number of loads performed. This handles
1109 redundancies as well as loads that are later dead. */
1110 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1111 bitmap_clear (perm);
1112 for (unsigned i = 0;
1113 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1114 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1115 ncopies = 0;
1116 bool load_seen = false;
1117 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1119 if (i % assumed_nunits == 0)
1121 if (load_seen)
1122 ncopies++;
1123 load_seen = false;
1125 if (bitmap_bit_p (perm, i))
1126 load_seen = true;
1128 if (load_seen)
1129 ncopies++;
1130 gcc_assert (ncopies
1131 <= (DR_GROUP_SIZE (first_stmt_info)
1132 - DR_GROUP_GAP (first_stmt_info)
1133 + assumed_nunits - 1) / assumed_nunits);
1136 /* Grouped loads read all elements in the group at once,
1137 so we want the DR for the first statement. */
1138 stmt_vec_info first_stmt_info = stmt_info;
1139 if (!slp_node && grouped_access_p)
1140 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1142 /* True if we should include any once-per-group costs as well as
1143 the cost of the statement itself. For SLP we only get called
1144 once per group anyhow. */
1145 bool first_stmt_p = (first_stmt_info == stmt_info);
1147 /* We assume that the cost of a single load-lanes instruction is
1148 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1149 access is instead being provided by a load-and-permute operation,
1150 include the cost of the permutes. */
1151 if (first_stmt_p
1152 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1154 /* Uses an even and odd extract operations or shuffle operations
1155 for each needed permute. */
1156 int group_size = DR_GROUP_SIZE (first_stmt_info);
1157 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1158 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1159 stmt_info, 0, vect_body);
1161 if (dump_enabled_p ())
1162 dump_printf_loc (MSG_NOTE, vect_location,
1163 "vect_model_load_cost: strided group_size = %d .\n",
1164 group_size);
1167 /* The loads themselves. */
1168 if (memory_access_type == VMAT_ELEMENTWISE
1169 || memory_access_type == VMAT_GATHER_SCATTER)
1171 /* N scalar loads plus gathering them into a vector. */
1172 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1173 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1174 inside_cost += record_stmt_cost (cost_vec,
1175 ncopies * assumed_nunits,
1176 scalar_load, stmt_info, 0, vect_body);
1178 else
1179 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1180 &inside_cost, &prologue_cost,
1181 cost_vec, cost_vec, true);
1182 if (memory_access_type == VMAT_ELEMENTWISE
1183 || memory_access_type == VMAT_STRIDED_SLP)
1184 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1185 stmt_info, 0, vect_body);
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: inside_cost = %d, "
1190 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1194 /* Calculate cost of DR's memory access. */
1195 void
1196 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1197 bool add_realign_cost, unsigned int *inside_cost,
1198 unsigned int *prologue_cost,
1199 stmt_vector_for_cost *prologue_cost_vec,
1200 stmt_vector_for_cost *body_cost_vec,
1201 bool record_prologue_costs)
1203 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1204 int alignment_support_scheme
1205 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1207 switch (alignment_support_scheme)
1209 case dr_aligned:
1211 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1212 stmt_info, 0, vect_body);
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_NOTE, vect_location,
1216 "vect_model_load_cost: aligned.\n");
1218 break;
1220 case dr_unaligned_supported:
1222 /* Here, we assign an additional cost for the unaligned load. */
1223 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1224 unaligned_load, stmt_info,
1225 DR_MISALIGNMENT (dr_info),
1226 vect_body);
1228 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_model_load_cost: unaligned supported by "
1231 "hardware.\n");
1233 break;
1235 case dr_explicit_realign:
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1238 vector_load, stmt_info, 0, vect_body);
1239 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1240 vec_perm, stmt_info, 0, vect_body);
1242 /* FIXME: If the misalignment remains fixed across the iterations of
1243 the containing loop, the following cost should be added to the
1244 prologue costs. */
1245 if (targetm.vectorize.builtin_mask_for_load)
1246 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1247 stmt_info, 0, vect_body);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: explicit realign\n");
1253 break;
1255 case dr_explicit_realign_optimized:
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_model_load_cost: unaligned software "
1260 "pipelined.\n");
1262 /* Unaligned software pipeline has a load of an address, an initial
1263 load, and possibly a mask operation to "prime" the loop. However,
1264 if this is an access in a group of loads, which provide grouped
1265 access, then the above cost should only be considered for one
1266 access in the group. Inside the loop, there is a load op
1267 and a realignment op. */
1269 if (add_realign_cost && record_prologue_costs)
1271 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1272 vector_stmt, stmt_info,
1273 0, vect_prologue);
1274 if (targetm.vectorize.builtin_mask_for_load)
1275 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1276 vector_stmt, stmt_info,
1277 0, vect_prologue);
1280 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1281 stmt_info, 0, vect_body);
1282 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1283 stmt_info, 0, vect_body);
1285 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE, vect_location,
1287 "vect_model_load_cost: explicit realign optimized"
1288 "\n");
1290 break;
1293 case dr_unaligned_unsupported:
1295 *inside_cost = VECT_MAX_COST;
1297 if (dump_enabled_p ())
1298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1299 "vect_model_load_cost: unsupported access.\n");
1300 break;
1303 default:
1304 gcc_unreachable ();
1308 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1309 the loop preheader for the vectorized stmt STMT_VINFO. */
1311 static void
1312 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1313 gimple_stmt_iterator *gsi)
1315 if (gsi)
1316 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1317 else
1318 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "created new init_stmt: %G", new_stmt);
1325 /* Function vect_init_vector.
1327 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1328 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1329 vector type a vector with all elements equal to VAL is created first.
1330 Place the initialization at GSI if it is not NULL. Otherwise, place the
1331 initialization at the loop preheader.
1332 Return the DEF of INIT_STMT.
1333 It will be used in the vectorization of STMT_INFO. */
1335 tree
1336 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1337 gimple_stmt_iterator *gsi)
1339 gimple *init_stmt;
1340 tree new_temp;
1342 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1343 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1345 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1346 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1348 /* Scalar boolean value should be transformed into
1349 all zeros or all ones value before building a vector. */
1350 if (VECTOR_BOOLEAN_TYPE_P (type))
1352 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1353 tree false_val = build_zero_cst (TREE_TYPE (type));
1355 if (CONSTANT_CLASS_P (val))
1356 val = integer_zerop (val) ? false_val : true_val;
1357 else
1359 new_temp = make_ssa_name (TREE_TYPE (type));
1360 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1361 val, true_val, false_val);
1362 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1363 val = new_temp;
1366 else
1368 gimple_seq stmts = NULL;
1369 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1370 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1371 TREE_TYPE (type), val);
1372 else
1373 /* ??? Condition vectorization expects us to do
1374 promotion of invariant/external defs. */
1375 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1376 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1377 !gsi_end_p (gsi2); )
1379 init_stmt = gsi_stmt (gsi2);
1380 gsi_remove (&gsi2, false);
1381 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1385 val = build_vector_from_val (type, val);
1388 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1389 init_stmt = gimple_build_assign (new_temp, val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 return new_temp;
1395 /* Function vect_get_vec_defs_for_operand.
1397 OP is an operand in STMT_VINFO. This function returns a vector of
1398 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1400 In the case that OP is an SSA_NAME which is defined in the loop, then
1401 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1403 In case OP is an invariant or constant, a new stmt that creates a vector def
1404 needs to be introduced. VECTYPE may be used to specify a required type for
1405 vector invariant. */
1407 void
1408 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1409 unsigned ncopies,
1410 tree op, vec<tree> *vec_oprnds, tree vectype)
1412 gimple *def_stmt;
1413 enum vect_def_type dt;
1414 bool is_simple_use;
1415 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1417 if (dump_enabled_p ())
1418 dump_printf_loc (MSG_NOTE, vect_location,
1419 "vect_get_vec_defs_for_operand: %T\n", op);
1421 stmt_vec_info def_stmt_info;
1422 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1423 &def_stmt_info, &def_stmt);
1424 gcc_assert (is_simple_use);
1425 if (def_stmt && dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1428 vec_oprnds->create (ncopies);
1429 if (dt == vect_constant_def || dt == vect_external_def)
1431 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1432 tree vector_type;
1434 if (vectype)
1435 vector_type = vectype;
1436 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1437 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1438 vector_type = truth_type_for (stmt_vectype);
1439 else
1440 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1442 gcc_assert (vector_type);
1443 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1444 while (ncopies--)
1445 vec_oprnds->quick_push (vop);
1447 else
1449 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1450 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1451 for (unsigned i = 0; i < ncopies; ++i)
1452 vec_oprnds->quick_push (gimple_get_lhs
1453 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1458 /* Get vectorized definitions for OP0 and OP1. */
1460 void
1461 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1462 unsigned ncopies,
1463 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1464 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1465 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1466 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1468 if (slp_node)
1470 if (op0)
1471 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1472 if (op1)
1473 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1474 if (op2)
1475 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1476 if (op3)
1477 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1479 else
1481 if (op0)
1482 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1483 op0, vec_oprnds0, vectype0);
1484 if (op1)
1485 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1486 op1, vec_oprnds1, vectype1);
1487 if (op2)
1488 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1489 op2, vec_oprnds2, vectype2);
1490 if (op3)
1491 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1492 op3, vec_oprnds3, vectype3);
1496 void
1497 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1498 unsigned ncopies,
1499 tree op0, vec<tree> *vec_oprnds0,
1500 tree op1, vec<tree> *vec_oprnds1,
1501 tree op2, vec<tree> *vec_oprnds2,
1502 tree op3, vec<tree> *vec_oprnds3)
1504 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1505 op0, vec_oprnds0, NULL_TREE,
1506 op1, vec_oprnds1, NULL_TREE,
1507 op2, vec_oprnds2, NULL_TREE,
1508 op3, vec_oprnds3, NULL_TREE);
1511 /* Helper function called by vect_finish_replace_stmt and
1512 vect_finish_stmt_generation. Set the location of the new
1513 statement and create and return a stmt_vec_info for it. */
1515 static void
1516 vect_finish_stmt_generation_1 (vec_info *,
1517 stmt_vec_info stmt_info, gimple *vec_stmt)
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1522 if (stmt_info)
1524 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1526 /* While EH edges will generally prevent vectorization, stmt might
1527 e.g. be in a must-not-throw region. Ensure newly created stmts
1528 that could throw are part of the same region. */
1529 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1530 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1531 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1533 else
1534 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1537 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1538 which sets the same scalar result as STMT_INFO did. Create and return a
1539 stmt_vec_info for VEC_STMT. */
1541 void
1542 vect_finish_replace_stmt (vec_info *vinfo,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1545 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1546 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1548 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1549 gsi_replace (&gsi, vec_stmt, true);
1551 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1554 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1555 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1557 void
1558 vect_finish_stmt_generation (vec_info *vinfo,
1559 stmt_vec_info stmt_info, gimple *vec_stmt,
1560 gimple_stmt_iterator *gsi)
1562 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1564 if (!gsi_end_p (*gsi)
1565 && gimple_has_mem_ops (vec_stmt))
1567 gimple *at_stmt = gsi_stmt (*gsi);
1568 tree vuse = gimple_vuse (at_stmt);
1569 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1571 tree vdef = gimple_vdef (at_stmt);
1572 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1573 gimple_set_modified (vec_stmt, true);
1574 /* If we have an SSA vuse and insert a store, update virtual
1575 SSA form to avoid triggering the renamer. Do so only
1576 if we can easily see all uses - which is what almost always
1577 happens with the way vectorized stmts are inserted. */
1578 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1579 && ((is_gimple_assign (vec_stmt)
1580 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1581 || (is_gimple_call (vec_stmt)
1582 && !(gimple_call_flags (vec_stmt)
1583 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1585 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1586 gimple_set_vdef (vec_stmt, new_vdef);
1587 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1591 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1592 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1595 /* We want to vectorize a call to combined function CFN with function
1596 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1597 as the types of all inputs. Check whether this is possible using
1598 an internal function, returning its code if so or IFN_LAST if not. */
1600 static internal_fn
1601 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1602 tree vectype_out, tree vectype_in)
1604 internal_fn ifn;
1605 if (internal_fn_p (cfn))
1606 ifn = as_internal_fn (cfn);
1607 else
1608 ifn = associated_internal_fn (fndecl);
1609 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1611 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1612 if (info.vectorizable)
1614 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1615 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1616 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1617 OPTIMIZE_FOR_SPEED))
1618 return ifn;
1621 return IFN_LAST;
1625 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1626 gimple_stmt_iterator *);
1628 /* Check whether a load or store statement in the loop described by
1629 LOOP_VINFO is possible in a loop using partial vectors. This is
1630 testing whether the vectorizer pass has the appropriate support,
1631 as well as whether the target does.
1633 VLS_TYPE says whether the statement is a load or store and VECTYPE
1634 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1635 says how the load or store is going to be implemented and GROUP_SIZE
1636 is the number of load or store statements in the containing group.
1637 If the access is a gather load or scatter store, GS_INFO describes
1638 its arguments. If the load or store is conditional, SCALAR_MASK is the
1639 condition under which it occurs.
1641 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1642 vectors is not supported, otherwise record the required rgroup control
1643 types. */
1645 static void
1646 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1647 vec_load_store_type vls_type,
1648 int group_size,
1649 vect_memory_access_type
1650 memory_access_type,
1651 gather_scatter_info *gs_info,
1652 tree scalar_mask)
1654 /* Invariant loads need no special support. */
1655 if (memory_access_type == VMAT_INVARIANT)
1656 return;
1658 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1659 machine_mode vecmode = TYPE_MODE (vectype);
1660 bool is_load = (vls_type == VLS_LOAD);
1661 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1663 if (is_load
1664 ? !vect_load_lanes_supported (vectype, group_size, true)
1665 : !vect_store_lanes_supported (vectype, group_size, true))
1667 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1669 "can't operate on partial vectors because"
1670 " the target doesn't have an appropriate"
1671 " load/store-lanes instruction.\n");
1672 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1673 return;
1675 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1676 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1677 return;
1680 if (memory_access_type == VMAT_GATHER_SCATTER)
1682 internal_fn ifn = (is_load
1683 ? IFN_MASK_GATHER_LOAD
1684 : IFN_MASK_SCATTER_STORE);
1685 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1686 gs_info->memory_type,
1687 gs_info->offset_vectype,
1688 gs_info->scale))
1690 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692 "can't operate on partial vectors because"
1693 " the target doesn't have an appropriate"
1694 " gather load or scatter store instruction.\n");
1695 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1696 return;
1698 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1699 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1700 return;
1703 if (memory_access_type != VMAT_CONTIGUOUS
1704 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1706 /* Element X of the data must come from iteration i * VF + X of the
1707 scalar loop. We need more work to support other mappings. */
1708 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1710 "can't operate on partial vectors because an"
1711 " access isn't contiguous.\n");
1712 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1713 return;
1716 if (!VECTOR_MODE_P (vecmode))
1718 if (dump_enabled_p ())
1719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1720 "can't operate on partial vectors when emulating"
1721 " vector operations.\n");
1722 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1723 return;
1726 /* We might load more scalars than we need for permuting SLP loads.
1727 We checked in get_group_load_store_type that the extra elements
1728 don't leak into a new vector. */
1729 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1731 unsigned int nvectors;
1732 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1733 return nvectors;
1734 gcc_unreachable ();
1737 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1738 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1739 machine_mode mask_mode;
1740 bool using_partial_vectors_p = false;
1741 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1742 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1744 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1745 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1746 using_partial_vectors_p = true;
1749 machine_mode vmode;
1750 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1752 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1753 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1754 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1755 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1756 using_partial_vectors_p = true;
1759 if (!using_partial_vectors_p)
1761 if (dump_enabled_p ())
1762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1763 "can't operate on partial vectors because the"
1764 " target doesn't have the appropriate partial"
1765 " vectorization load or store.\n");
1766 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1770 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1771 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1772 that needs to be applied to all loads and stores in a vectorized loop.
1773 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1775 MASK_TYPE is the type of both masks. If new statements are needed,
1776 insert them before GSI. */
1778 static tree
1779 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1780 gimple_stmt_iterator *gsi)
1782 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1783 if (!loop_mask)
1784 return vec_mask;
1786 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1787 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1788 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1789 vec_mask, loop_mask);
1790 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1791 return and_res;
1794 /* Determine whether we can use a gather load or scatter store to vectorize
1795 strided load or store STMT_INFO by truncating the current offset to a
1796 smaller width. We need to be able to construct an offset vector:
1798 { 0, X, X*2, X*3, ... }
1800 without loss of precision, where X is STMT_INFO's DR_STEP.
1802 Return true if this is possible, describing the gather load or scatter
1803 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1805 static bool
1806 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1807 loop_vec_info loop_vinfo, bool masked_p,
1808 gather_scatter_info *gs_info)
1810 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1811 data_reference *dr = dr_info->dr;
1812 tree step = DR_STEP (dr);
1813 if (TREE_CODE (step) != INTEGER_CST)
1815 /* ??? Perhaps we could use range information here? */
1816 if (dump_enabled_p ())
1817 dump_printf_loc (MSG_NOTE, vect_location,
1818 "cannot truncate variable step.\n");
1819 return false;
1822 /* Get the number of bits in an element. */
1823 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1824 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1825 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1827 /* Set COUNT to the upper limit on the number of elements - 1.
1828 Start with the maximum vectorization factor. */
1829 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1831 /* Try lowering COUNT to the number of scalar latch iterations. */
1832 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1833 widest_int max_iters;
1834 if (max_loop_iterations (loop, &max_iters)
1835 && max_iters < count)
1836 count = max_iters.to_shwi ();
1838 /* Try scales of 1 and the element size. */
1839 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1840 wi::overflow_type overflow = wi::OVF_NONE;
1841 for (int i = 0; i < 2; ++i)
1843 int scale = scales[i];
1844 widest_int factor;
1845 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1846 continue;
1848 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1849 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1850 if (overflow)
1851 continue;
1852 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1853 unsigned int min_offset_bits = wi::min_precision (range, sign);
1855 /* Find the narrowest viable offset type. */
1856 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1857 tree offset_type = build_nonstandard_integer_type (offset_bits,
1858 sign == UNSIGNED);
1860 /* See whether the target supports the operation with an offset
1861 no narrower than OFFSET_TYPE. */
1862 tree memory_type = TREE_TYPE (DR_REF (dr));
1863 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1864 vectype, memory_type, offset_type, scale,
1865 &gs_info->ifn, &gs_info->offset_vectype))
1866 continue;
1868 gs_info->decl = NULL_TREE;
1869 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1870 but we don't need to store that here. */
1871 gs_info->base = NULL_TREE;
1872 gs_info->element_type = TREE_TYPE (vectype);
1873 gs_info->offset = fold_convert (offset_type, step);
1874 gs_info->offset_dt = vect_constant_def;
1875 gs_info->scale = scale;
1876 gs_info->memory_type = memory_type;
1877 return true;
1880 if (overflow && dump_enabled_p ())
1881 dump_printf_loc (MSG_NOTE, vect_location,
1882 "truncating gather/scatter offset to %d bits"
1883 " might change its value.\n", element_bits);
1885 return false;
1888 /* Return true if we can use gather/scatter internal functions to
1889 vectorize STMT_INFO, which is a grouped or strided load or store.
1890 MASKED_P is true if load or store is conditional. When returning
1891 true, fill in GS_INFO with the information required to perform the
1892 operation. */
1894 static bool
1895 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1896 loop_vec_info loop_vinfo, bool masked_p,
1897 gather_scatter_info *gs_info)
1899 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1900 || gs_info->decl)
1901 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1902 masked_p, gs_info);
1904 tree old_offset_type = TREE_TYPE (gs_info->offset);
1905 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1907 gcc_assert (TYPE_PRECISION (new_offset_type)
1908 >= TYPE_PRECISION (old_offset_type));
1909 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1911 if (dump_enabled_p ())
1912 dump_printf_loc (MSG_NOTE, vect_location,
1913 "using gather/scatter for strided/grouped access,"
1914 " scale = %d\n", gs_info->scale);
1916 return true;
1919 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1920 elements with a known constant step. Return -1 if that step
1921 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1923 static int
1924 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1926 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1927 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1928 size_zero_node);
1931 /* If the target supports a permute mask that reverses the elements in
1932 a vector of type VECTYPE, return that mask, otherwise return null. */
1934 static tree
1935 perm_mask_for_reverse (tree vectype)
1937 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1939 /* The encoding has a single stepped pattern. */
1940 vec_perm_builder sel (nunits, 1, 3);
1941 for (int i = 0; i < 3; ++i)
1942 sel.quick_push (nunits - 1 - i);
1944 vec_perm_indices indices (sel, 1, nunits);
1945 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1946 return NULL_TREE;
1947 return vect_gen_perm_mask_checked (vectype, indices);
1950 /* A subroutine of get_load_store_type, with a subset of the same
1951 arguments. Handle the case where STMT_INFO is a load or store that
1952 accesses consecutive elements with a negative step. */
1954 static vect_memory_access_type
1955 get_negative_load_store_type (vec_info *vinfo,
1956 stmt_vec_info stmt_info, tree vectype,
1957 vec_load_store_type vls_type,
1958 unsigned int ncopies)
1960 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1961 dr_alignment_support alignment_support_scheme;
1963 if (ncopies > 1)
1965 if (dump_enabled_p ())
1966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1967 "multiple types with negative step.\n");
1968 return VMAT_ELEMENTWISE;
1971 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1972 dr_info, false);
1973 if (alignment_support_scheme != dr_aligned
1974 && alignment_support_scheme != dr_unaligned_supported)
1976 if (dump_enabled_p ())
1977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1978 "negative step but alignment required.\n");
1979 return VMAT_ELEMENTWISE;
1982 if (vls_type == VLS_STORE_INVARIANT)
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_NOTE, vect_location,
1986 "negative step with invariant source;"
1987 " no permute needed.\n");
1988 return VMAT_CONTIGUOUS_DOWN;
1991 if (!perm_mask_for_reverse (vectype))
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 "negative step and reversing not supported.\n");
1996 return VMAT_ELEMENTWISE;
1999 return VMAT_CONTIGUOUS_REVERSE;
2002 /* STMT_INFO is either a masked or unconditional store. Return the value
2003 being stored. */
2005 tree
2006 vect_get_store_rhs (stmt_vec_info stmt_info)
2008 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2010 gcc_assert (gimple_assign_single_p (assign));
2011 return gimple_assign_rhs1 (assign);
2013 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2015 internal_fn ifn = gimple_call_internal_fn (call);
2016 int index = internal_fn_stored_value_index (ifn);
2017 gcc_assert (index >= 0);
2018 return gimple_call_arg (call, index);
2020 gcc_unreachable ();
2023 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2025 This function returns a vector type which can be composed with NETLS pieces,
2026 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2027 same vector size as the return vector. It checks target whether supports
2028 pieces-size vector mode for construction firstly, if target fails to, check
2029 pieces-size scalar mode for construction further. It returns NULL_TREE if
2030 fails to find the available composition.
2032 For example, for (vtype=V16QI, nelts=4), we can probably get:
2033 - V16QI with PTYPE V4QI.
2034 - V4SI with PTYPE SI.
2035 - NULL_TREE. */
2037 static tree
2038 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2040 gcc_assert (VECTOR_TYPE_P (vtype));
2041 gcc_assert (known_gt (nelts, 0U));
2043 machine_mode vmode = TYPE_MODE (vtype);
2044 if (!VECTOR_MODE_P (vmode))
2045 return NULL_TREE;
2047 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2048 unsigned int pbsize;
2049 if (constant_multiple_p (vbsize, nelts, &pbsize))
2051 /* First check if vec_init optab supports construction from
2052 vector pieces directly. */
2053 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2054 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2055 machine_mode rmode;
2056 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2057 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2058 != CODE_FOR_nothing))
2060 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2061 return vtype;
2064 /* Otherwise check if exists an integer type of the same piece size and
2065 if vec_init optab supports construction from it directly. */
2066 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2067 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2068 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2069 != CODE_FOR_nothing))
2071 *ptype = build_nonstandard_integer_type (pbsize, 1);
2072 return build_vector_type (*ptype, nelts);
2076 return NULL_TREE;
2079 /* A subroutine of get_load_store_type, with a subset of the same
2080 arguments. Handle the case where STMT_INFO is part of a grouped load
2081 or store.
2083 For stores, the statements in the group are all consecutive
2084 and there is no gap at the end. For loads, the statements in the
2085 group might not be consecutive; there can be gaps between statements
2086 as well as at the end. */
2088 static bool
2089 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2090 tree vectype, slp_tree slp_node,
2091 bool masked_p, vec_load_store_type vls_type,
2092 vect_memory_access_type *memory_access_type,
2093 dr_alignment_support *alignment_support_scheme,
2094 gather_scatter_info *gs_info)
2096 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2097 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2098 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2099 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2100 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2101 bool single_element_p = (stmt_info == first_stmt_info
2102 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2103 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2104 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2106 /* True if the vectorized statements would access beyond the last
2107 statement in the group. */
2108 bool overrun_p = false;
2110 /* True if we can cope with such overrun by peeling for gaps, so that
2111 there is at least one final scalar iteration after the vector loop. */
2112 bool can_overrun_p = (!masked_p
2113 && vls_type == VLS_LOAD
2114 && loop_vinfo
2115 && !loop->inner);
2117 /* There can only be a gap at the end of the group if the stride is
2118 known at compile time. */
2119 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2121 /* Stores can't yet have gaps. */
2122 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2124 if (slp_node)
2126 /* For SLP vectorization we directly vectorize a subchain
2127 without permutation. */
2128 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2129 first_dr_info
2130 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2131 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2133 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2134 separated by the stride, until we have a complete vector.
2135 Fall back to scalar accesses if that isn't possible. */
2136 if (multiple_p (nunits, group_size))
2137 *memory_access_type = VMAT_STRIDED_SLP;
2138 else
2139 *memory_access_type = VMAT_ELEMENTWISE;
2141 else
2143 overrun_p = loop_vinfo && gap != 0;
2144 if (overrun_p && vls_type != VLS_LOAD)
2146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2147 "Grouped store with gaps requires"
2148 " non-consecutive accesses\n");
2149 return false;
2151 /* An overrun is fine if the trailing elements are smaller
2152 than the alignment boundary B. Every vector access will
2153 be a multiple of B and so we are guaranteed to access a
2154 non-gap element in the same B-sized block. */
2155 if (overrun_p
2156 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2157 / vect_get_scalar_dr_size (first_dr_info)))
2158 overrun_p = false;
2160 /* If the gap splits the vector in half and the target
2161 can do half-vector operations avoid the epilogue peeling
2162 by simply loading half of the vector only. Usually
2163 the construction with an upper zero half will be elided. */
2164 dr_alignment_support alignment_support_scheme;
2165 tree half_vtype;
2166 if (overrun_p
2167 && !masked_p
2168 && (((alignment_support_scheme
2169 = vect_supportable_dr_alignment (vinfo,
2170 first_dr_info, false)))
2171 == dr_aligned
2172 || alignment_support_scheme == dr_unaligned_supported)
2173 && known_eq (nunits, (group_size - gap) * 2)
2174 && known_eq (nunits, group_size)
2175 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2176 != NULL_TREE))
2177 overrun_p = false;
2179 if (overrun_p && !can_overrun_p)
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "Peeling for outer loop is not supported\n");
2184 return false;
2186 int cmp = compare_step_with_zero (vinfo, stmt_info);
2187 if (cmp < 0)
2189 if (single_element_p)
2190 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2191 only correct for single element "interleaving" SLP. */
2192 *memory_access_type = get_negative_load_store_type
2193 (vinfo, stmt_info, vectype, vls_type, 1);
2194 else
2195 *memory_access_type = VMAT_STRIDED_SLP;
2197 else
2199 gcc_assert (!loop_vinfo || cmp > 0);
2200 *memory_access_type = VMAT_CONTIGUOUS;
2204 else
2206 /* We can always handle this case using elementwise accesses,
2207 but see if something more efficient is available. */
2208 *memory_access_type = VMAT_ELEMENTWISE;
2210 /* If there is a gap at the end of the group then these optimizations
2211 would access excess elements in the last iteration. */
2212 bool would_overrun_p = (gap != 0);
2213 /* An overrun is fine if the trailing elements are smaller than the
2214 alignment boundary B. Every vector access will be a multiple of B
2215 and so we are guaranteed to access a non-gap element in the
2216 same B-sized block. */
2217 if (would_overrun_p
2218 && !masked_p
2219 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2220 / vect_get_scalar_dr_size (first_dr_info)))
2221 would_overrun_p = false;
2223 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2224 && (can_overrun_p || !would_overrun_p)
2225 && compare_step_with_zero (vinfo, stmt_info) > 0)
2227 /* First cope with the degenerate case of a single-element
2228 vector. */
2229 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2230 *memory_access_type = VMAT_CONTIGUOUS;
2232 /* Otherwise try using LOAD/STORE_LANES. */
2233 if (*memory_access_type == VMAT_ELEMENTWISE
2234 && (vls_type == VLS_LOAD
2235 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2236 : vect_store_lanes_supported (vectype, group_size,
2237 masked_p)))
2239 *memory_access_type = VMAT_LOAD_STORE_LANES;
2240 overrun_p = would_overrun_p;
2243 /* If that fails, try using permuting loads. */
2244 if (*memory_access_type == VMAT_ELEMENTWISE
2245 && (vls_type == VLS_LOAD
2246 ? vect_grouped_load_supported (vectype, single_element_p,
2247 group_size)
2248 : vect_grouped_store_supported (vectype, group_size)))
2250 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2251 overrun_p = would_overrun_p;
2255 /* As a last resort, trying using a gather load or scatter store.
2257 ??? Although the code can handle all group sizes correctly,
2258 it probably isn't a win to use separate strided accesses based
2259 on nearby locations. Or, even if it's a win over scalar code,
2260 it might not be a win over vectorizing at a lower VF, if that
2261 allows us to use contiguous accesses. */
2262 if (*memory_access_type == VMAT_ELEMENTWISE
2263 && single_element_p
2264 && loop_vinfo
2265 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2266 masked_p, gs_info))
2267 *memory_access_type = VMAT_GATHER_SCATTER;
2270 if (*memory_access_type == VMAT_GATHER_SCATTER
2271 || *memory_access_type == VMAT_ELEMENTWISE)
2272 *alignment_support_scheme = dr_unaligned_supported;
2273 else
2274 *alignment_support_scheme
2275 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2277 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2279 /* STMT is the leader of the group. Check the operands of all the
2280 stmts of the group. */
2281 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2282 while (next_stmt_info)
2284 tree op = vect_get_store_rhs (next_stmt_info);
2285 enum vect_def_type dt;
2286 if (!vect_is_simple_use (op, vinfo, &dt))
2288 if (dump_enabled_p ())
2289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2290 "use not simple.\n");
2291 return false;
2293 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2297 if (overrun_p)
2299 gcc_assert (can_overrun_p);
2300 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 "Data access with gaps requires scalar "
2303 "epilogue loop\n");
2304 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2307 return true;
2310 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2311 if there is a memory access type that the vectorized form can use,
2312 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2313 or scatters, fill in GS_INFO accordingly. In addition
2314 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2315 the target does not support the alignment scheme.
2317 SLP says whether we're performing SLP rather than loop vectorization.
2318 MASKED_P is true if the statement is conditional on a vectorized mask.
2319 VECTYPE is the vector type that the vectorized statements will use.
2320 NCOPIES is the number of vector statements that will be needed. */
2322 static bool
2323 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2324 tree vectype, slp_tree slp_node,
2325 bool masked_p, vec_load_store_type vls_type,
2326 unsigned int ncopies,
2327 vect_memory_access_type *memory_access_type,
2328 dr_alignment_support *alignment_support_scheme,
2329 gather_scatter_info *gs_info)
2331 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2332 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2333 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2335 *memory_access_type = VMAT_GATHER_SCATTER;
2336 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2337 gcc_unreachable ();
2338 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2339 &gs_info->offset_dt,
2340 &gs_info->offset_vectype))
2342 if (dump_enabled_p ())
2343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2344 "%s index use not simple.\n",
2345 vls_type == VLS_LOAD ? "gather" : "scatter");
2346 return false;
2348 /* Gather-scatter accesses perform only component accesses, alignment
2349 is irrelevant for them. */
2350 *alignment_support_scheme = dr_unaligned_supported;
2352 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2354 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2355 masked_p,
2356 vls_type, memory_access_type,
2357 alignment_support_scheme, gs_info))
2358 return false;
2360 else if (STMT_VINFO_STRIDED_P (stmt_info))
2362 gcc_assert (!slp_node);
2363 if (loop_vinfo
2364 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2365 masked_p, gs_info))
2366 *memory_access_type = VMAT_GATHER_SCATTER;
2367 else
2368 *memory_access_type = VMAT_ELEMENTWISE;
2369 /* Alignment is irrelevant here. */
2370 *alignment_support_scheme = dr_unaligned_supported;
2372 else
2374 int cmp = compare_step_with_zero (vinfo, stmt_info);
2375 if (cmp < 0)
2376 *memory_access_type = get_negative_load_store_type
2377 (vinfo, stmt_info, vectype, vls_type, ncopies);
2378 else if (cmp == 0)
2380 gcc_assert (vls_type == VLS_LOAD);
2381 *memory_access_type = VMAT_INVARIANT;
2383 else
2384 *memory_access_type = VMAT_CONTIGUOUS;
2385 *alignment_support_scheme
2386 = vect_supportable_dr_alignment (vinfo,
2387 STMT_VINFO_DR_INFO (stmt_info), false);
2390 if ((*memory_access_type == VMAT_ELEMENTWISE
2391 || *memory_access_type == VMAT_STRIDED_SLP)
2392 && !nunits.is_constant ())
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396 "Not using elementwise accesses due to variable "
2397 "vectorization factor.\n");
2398 return false;
2401 if (*alignment_support_scheme == dr_unaligned_unsupported)
2403 if (dump_enabled_p ())
2404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2405 "unsupported unaligned access\n");
2406 return false;
2409 /* FIXME: At the moment the cost model seems to underestimate the
2410 cost of using elementwise accesses. This check preserves the
2411 traditional behavior until that can be fixed. */
2412 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2413 if (!first_stmt_info)
2414 first_stmt_info = stmt_info;
2415 if (*memory_access_type == VMAT_ELEMENTWISE
2416 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2417 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2418 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2419 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2423 "not falling back to elementwise accesses\n");
2424 return false;
2426 return true;
2429 /* Return true if boolean argument MASK is suitable for vectorizing
2430 conditional operation STMT_INFO. When returning true, store the type
2431 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2432 in *MASK_VECTYPE_OUT. */
2434 static bool
2435 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2436 vect_def_type *mask_dt_out,
2437 tree *mask_vectype_out)
2439 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2441 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2443 "mask argument is not a boolean.\n");
2444 return false;
2447 if (TREE_CODE (mask) != SSA_NAME)
2449 if (dump_enabled_p ())
2450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2451 "mask argument is not an SSA name.\n");
2452 return false;
2455 enum vect_def_type mask_dt;
2456 tree mask_vectype;
2457 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2459 if (dump_enabled_p ())
2460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2461 "mask use not simple.\n");
2462 return false;
2465 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2466 if (!mask_vectype)
2467 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2469 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2471 if (dump_enabled_p ())
2472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2473 "could not find an appropriate vector mask type.\n");
2474 return false;
2477 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2478 TYPE_VECTOR_SUBPARTS (vectype)))
2480 if (dump_enabled_p ())
2481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2482 "vector mask type %T"
2483 " does not match vector data type %T.\n",
2484 mask_vectype, vectype);
2486 return false;
2489 *mask_dt_out = mask_dt;
2490 *mask_vectype_out = mask_vectype;
2491 return true;
2494 /* Return true if stored value RHS is suitable for vectorizing store
2495 statement STMT_INFO. When returning true, store the type of the
2496 definition in *RHS_DT_OUT, the type of the vectorized store value in
2497 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2499 static bool
2500 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2501 slp_tree slp_node, tree rhs,
2502 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2503 vec_load_store_type *vls_type_out)
2505 /* In the case this is a store from a constant make sure
2506 native_encode_expr can handle it. */
2507 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2509 if (dump_enabled_p ())
2510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2511 "cannot encode constant as a byte sequence.\n");
2512 return false;
2515 enum vect_def_type rhs_dt;
2516 tree rhs_vectype;
2517 slp_tree slp_op;
2518 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2519 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2521 if (dump_enabled_p ())
2522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2523 "use not simple.\n");
2524 return false;
2527 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2528 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2530 if (dump_enabled_p ())
2531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2532 "incompatible vector types.\n");
2533 return false;
2536 *rhs_dt_out = rhs_dt;
2537 *rhs_vectype_out = rhs_vectype;
2538 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2539 *vls_type_out = VLS_STORE_INVARIANT;
2540 else
2541 *vls_type_out = VLS_STORE;
2542 return true;
2545 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2546 Note that we support masks with floating-point type, in which case the
2547 floats are interpreted as a bitmask. */
2549 static tree
2550 vect_build_all_ones_mask (vec_info *vinfo,
2551 stmt_vec_info stmt_info, tree masktype)
2553 if (TREE_CODE (masktype) == INTEGER_TYPE)
2554 return build_int_cst (masktype, -1);
2555 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2557 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2558 mask = build_vector_from_val (masktype, mask);
2559 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2561 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2563 REAL_VALUE_TYPE r;
2564 long tmp[6];
2565 for (int j = 0; j < 6; ++j)
2566 tmp[j] = -1;
2567 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2568 tree mask = build_real (TREE_TYPE (masktype), r);
2569 mask = build_vector_from_val (masktype, mask);
2570 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2572 gcc_unreachable ();
2575 /* Build an all-zero merge value of type VECTYPE while vectorizing
2576 STMT_INFO as a gather load. */
2578 static tree
2579 vect_build_zero_merge_argument (vec_info *vinfo,
2580 stmt_vec_info stmt_info, tree vectype)
2582 tree merge;
2583 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2584 merge = build_int_cst (TREE_TYPE (vectype), 0);
2585 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2587 REAL_VALUE_TYPE r;
2588 long tmp[6];
2589 for (int j = 0; j < 6; ++j)
2590 tmp[j] = 0;
2591 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2592 merge = build_real (TREE_TYPE (vectype), r);
2594 else
2595 gcc_unreachable ();
2596 merge = build_vector_from_val (vectype, merge);
2597 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2600 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2601 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2602 the gather load operation. If the load is conditional, MASK is the
2603 unvectorized condition and MASK_DT is its definition type, otherwise
2604 MASK is null. */
2606 static void
2607 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2608 gimple_stmt_iterator *gsi,
2609 gimple **vec_stmt,
2610 gather_scatter_info *gs_info,
2611 tree mask)
2613 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2614 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2615 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2616 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2617 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2618 edge pe = loop_preheader_edge (loop);
2619 enum { NARROW, NONE, WIDEN } modifier;
2620 poly_uint64 gather_off_nunits
2621 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2623 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2624 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2625 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2626 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2627 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2628 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2629 tree scaletype = TREE_VALUE (arglist);
2630 tree real_masktype = masktype;
2631 gcc_checking_assert (types_compatible_p (srctype, rettype)
2632 && (!mask
2633 || TREE_CODE (masktype) == INTEGER_TYPE
2634 || types_compatible_p (srctype, masktype)));
2635 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2636 masktype = truth_type_for (srctype);
2638 tree mask_halftype = masktype;
2639 tree perm_mask = NULL_TREE;
2640 tree mask_perm_mask = NULL_TREE;
2641 if (known_eq (nunits, gather_off_nunits))
2642 modifier = NONE;
2643 else if (known_eq (nunits * 2, gather_off_nunits))
2645 modifier = WIDEN;
2647 /* Currently widening gathers and scatters are only supported for
2648 fixed-length vectors. */
2649 int count = gather_off_nunits.to_constant ();
2650 vec_perm_builder sel (count, count, 1);
2651 for (int i = 0; i < count; ++i)
2652 sel.quick_push (i | (count / 2));
2654 vec_perm_indices indices (sel, 1, count);
2655 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2656 indices);
2658 else if (known_eq (nunits, gather_off_nunits * 2))
2660 modifier = NARROW;
2662 /* Currently narrowing gathers and scatters are only supported for
2663 fixed-length vectors. */
2664 int count = nunits.to_constant ();
2665 vec_perm_builder sel (count, count, 1);
2666 sel.quick_grow (count);
2667 for (int i = 0; i < count; ++i)
2668 sel[i] = i < count / 2 ? i : i + count / 2;
2669 vec_perm_indices indices (sel, 2, count);
2670 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2672 ncopies *= 2;
2674 if (mask && masktype == real_masktype)
2676 for (int i = 0; i < count; ++i)
2677 sel[i] = i | (count / 2);
2678 indices.new_vector (sel, 2, count);
2679 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2681 else if (mask)
2682 mask_halftype = truth_type_for (gs_info->offset_vectype);
2684 else
2685 gcc_unreachable ();
2687 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2688 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2690 tree ptr = fold_convert (ptrtype, gs_info->base);
2691 if (!is_gimple_min_invariant (ptr))
2693 gimple_seq seq;
2694 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2695 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2696 gcc_assert (!new_bb);
2699 tree scale = build_int_cst (scaletype, gs_info->scale);
2701 tree vec_oprnd0 = NULL_TREE;
2702 tree vec_mask = NULL_TREE;
2703 tree src_op = NULL_TREE;
2704 tree mask_op = NULL_TREE;
2705 tree prev_res = NULL_TREE;
2707 if (!mask)
2709 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2710 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2713 auto_vec<tree> vec_oprnds0;
2714 auto_vec<tree> vec_masks;
2715 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2716 modifier == WIDEN ? ncopies / 2 : ncopies,
2717 gs_info->offset, &vec_oprnds0);
2718 if (mask)
2719 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2720 modifier == NARROW ? ncopies / 2 : ncopies,
2721 mask, &vec_masks);
2722 for (int j = 0; j < ncopies; ++j)
2724 tree op, var;
2725 if (modifier == WIDEN && (j & 1))
2726 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2727 perm_mask, stmt_info, gsi);
2728 else
2729 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2731 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2733 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2734 TYPE_VECTOR_SUBPARTS (idxtype)));
2735 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2736 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2737 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2738 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2739 op = var;
2742 if (mask)
2744 if (mask_perm_mask && (j & 1))
2745 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2746 mask_perm_mask, stmt_info, gsi);
2747 else
2749 if (modifier == NARROW)
2751 if ((j & 1) == 0)
2752 vec_mask = vec_masks[j / 2];
2754 else
2755 vec_mask = vec_masks[j];
2757 mask_op = vec_mask;
2758 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2760 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2761 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2762 gcc_assert (known_eq (sub1, sub2));
2763 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2764 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2765 gassign *new_stmt
2766 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2767 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2768 mask_op = var;
2771 if (modifier == NARROW && masktype != real_masktype)
2773 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2774 gassign *new_stmt
2775 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2776 : VEC_UNPACK_LO_EXPR,
2777 mask_op);
2778 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2779 mask_op = var;
2781 src_op = mask_op;
2784 tree mask_arg = mask_op;
2785 if (masktype != real_masktype)
2787 tree utype, optype = TREE_TYPE (mask_op);
2788 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2789 utype = real_masktype;
2790 else
2791 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2792 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2793 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2794 gassign *new_stmt
2795 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2796 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2797 mask_arg = var;
2798 if (!useless_type_conversion_p (real_masktype, utype))
2800 gcc_assert (TYPE_PRECISION (utype)
2801 <= TYPE_PRECISION (real_masktype));
2802 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2803 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2804 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2805 mask_arg = var;
2807 src_op = build_zero_cst (srctype);
2809 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2810 mask_arg, scale);
2812 if (!useless_type_conversion_p (vectype, rettype))
2814 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2815 TYPE_VECTOR_SUBPARTS (rettype)));
2816 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2817 gimple_call_set_lhs (new_stmt, op);
2818 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2819 var = make_ssa_name (vec_dest);
2820 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2821 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2822 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2824 else
2826 var = make_ssa_name (vec_dest, new_stmt);
2827 gimple_call_set_lhs (new_stmt, var);
2828 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2831 if (modifier == NARROW)
2833 if ((j & 1) == 0)
2835 prev_res = var;
2836 continue;
2838 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2839 stmt_info, gsi);
2840 new_stmt = SSA_NAME_DEF_STMT (var);
2843 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2845 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2848 /* Prepare the base and offset in GS_INFO for vectorization.
2849 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2850 to the vectorized offset argument for the first copy of STMT_INFO.
2851 STMT_INFO is the statement described by GS_INFO and LOOP is the
2852 containing loop. */
2854 static void
2855 vect_get_gather_scatter_ops (vec_info *vinfo,
2856 class loop *loop, stmt_vec_info stmt_info,
2857 gather_scatter_info *gs_info,
2858 tree *dataref_ptr, vec<tree> *vec_offset,
2859 unsigned ncopies)
2861 gimple_seq stmts = NULL;
2862 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2863 if (stmts != NULL)
2865 basic_block new_bb;
2866 edge pe = loop_preheader_edge (loop);
2867 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2868 gcc_assert (!new_bb);
2870 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2871 vec_offset, gs_info->offset_vectype);
2874 /* Prepare to implement a grouped or strided load or store using
2875 the gather load or scatter store operation described by GS_INFO.
2876 STMT_INFO is the load or store statement.
2878 Set *DATAREF_BUMP to the amount that should be added to the base
2879 address after each copy of the vectorized statement. Set *VEC_OFFSET
2880 to an invariant offset vector in which element I has the value
2881 I * DR_STEP / SCALE. */
2883 static void
2884 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2885 loop_vec_info loop_vinfo,
2886 gather_scatter_info *gs_info,
2887 tree *dataref_bump, tree *vec_offset)
2889 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2890 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2892 tree bump = size_binop (MULT_EXPR,
2893 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2894 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2895 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2897 /* The offset given in GS_INFO can have pointer type, so use the element
2898 type of the vector instead. */
2899 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2901 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2902 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2903 ssize_int (gs_info->scale));
2904 step = fold_convert (offset_type, step);
2906 /* Create {0, X, X*2, X*3, ...}. */
2907 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2908 build_zero_cst (offset_type), step);
2909 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2912 /* Return the amount that should be added to a vector pointer to move
2913 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2914 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2915 vectorization. */
2917 static tree
2918 vect_get_data_ptr_increment (vec_info *vinfo,
2919 dr_vec_info *dr_info, tree aggr_type,
2920 vect_memory_access_type memory_access_type)
2922 if (memory_access_type == VMAT_INVARIANT)
2923 return size_zero_node;
2925 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2926 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2927 if (tree_int_cst_sgn (step) == -1)
2928 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2929 return iv_step;
2932 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2934 static bool
2935 vectorizable_bswap (vec_info *vinfo,
2936 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2937 gimple **vec_stmt, slp_tree slp_node,
2938 slp_tree *slp_op,
2939 tree vectype_in, stmt_vector_for_cost *cost_vec)
2941 tree op, vectype;
2942 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2943 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2944 unsigned ncopies;
2946 op = gimple_call_arg (stmt, 0);
2947 vectype = STMT_VINFO_VECTYPE (stmt_info);
2948 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2950 /* Multiple types in SLP are handled by creating the appropriate number of
2951 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2952 case of SLP. */
2953 if (slp_node)
2954 ncopies = 1;
2955 else
2956 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2958 gcc_assert (ncopies >= 1);
2960 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2961 if (! char_vectype)
2962 return false;
2964 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2965 unsigned word_bytes;
2966 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2967 return false;
2969 /* The encoding uses one stepped pattern for each byte in the word. */
2970 vec_perm_builder elts (num_bytes, word_bytes, 3);
2971 for (unsigned i = 0; i < 3; ++i)
2972 for (unsigned j = 0; j < word_bytes; ++j)
2973 elts.quick_push ((i + 1) * word_bytes - j - 1);
2975 vec_perm_indices indices (elts, 1, num_bytes);
2976 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2977 return false;
2979 if (! vec_stmt)
2981 if (slp_node
2982 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2984 if (dump_enabled_p ())
2985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2986 "incompatible vector types for invariants\n");
2987 return false;
2990 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2991 DUMP_VECT_SCOPE ("vectorizable_bswap");
2992 if (! slp_node)
2994 record_stmt_cost (cost_vec,
2995 1, vector_stmt, stmt_info, 0, vect_prologue);
2996 record_stmt_cost (cost_vec,
2997 ncopies, vec_perm, stmt_info, 0, vect_body);
2999 return true;
3002 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3004 /* Transform. */
3005 vec<tree> vec_oprnds = vNULL;
3006 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3007 op, &vec_oprnds);
3008 /* Arguments are ready. create the new vector stmt. */
3009 unsigned i;
3010 tree vop;
3011 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3013 gimple *new_stmt;
3014 tree tem = make_ssa_name (char_vectype);
3015 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3016 char_vectype, vop));
3017 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3018 tree tem2 = make_ssa_name (char_vectype);
3019 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3020 tem, tem, bswap_vconst);
3021 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3022 tem = make_ssa_name (vectype);
3023 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3024 vectype, tem2));
3025 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3026 if (slp_node)
3027 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3028 else
3029 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3032 if (!slp_node)
3033 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3035 vec_oprnds.release ();
3036 return true;
3039 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3040 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3041 in a single step. On success, store the binary pack code in
3042 *CONVERT_CODE. */
3044 static bool
3045 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3046 tree_code *convert_code)
3048 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3049 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3050 return false;
3052 tree_code code;
3053 int multi_step_cvt = 0;
3054 auto_vec <tree, 8> interm_types;
3055 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3056 &code, &multi_step_cvt, &interm_types)
3057 || multi_step_cvt)
3058 return false;
3060 *convert_code = code;
3061 return true;
3064 /* Function vectorizable_call.
3066 Check if STMT_INFO performs a function call that can be vectorized.
3067 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3068 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3069 Return true if STMT_INFO is vectorizable in this way. */
3071 static bool
3072 vectorizable_call (vec_info *vinfo,
3073 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3074 gimple **vec_stmt, slp_tree slp_node,
3075 stmt_vector_for_cost *cost_vec)
3077 gcall *stmt;
3078 tree vec_dest;
3079 tree scalar_dest;
3080 tree op;
3081 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3082 tree vectype_out, vectype_in;
3083 poly_uint64 nunits_in;
3084 poly_uint64 nunits_out;
3085 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3086 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3087 tree fndecl, new_temp, rhs_type;
3088 enum vect_def_type dt[4]
3089 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3090 vect_unknown_def_type };
3091 tree vectypes[ARRAY_SIZE (dt)] = {};
3092 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3093 int ndts = ARRAY_SIZE (dt);
3094 int ncopies, j;
3095 auto_vec<tree, 8> vargs;
3096 auto_vec<tree, 8> orig_vargs;
3097 enum { NARROW, NONE, WIDEN } modifier;
3098 size_t i, nargs;
3099 tree lhs;
3101 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3102 return false;
3104 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3105 && ! vec_stmt)
3106 return false;
3108 /* Is STMT_INFO a vectorizable call? */
3109 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3110 if (!stmt)
3111 return false;
3113 if (gimple_call_internal_p (stmt)
3114 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3115 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3116 /* Handled by vectorizable_load and vectorizable_store. */
3117 return false;
3119 if (gimple_call_lhs (stmt) == NULL_TREE
3120 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3121 return false;
3123 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3125 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3127 /* Process function arguments. */
3128 rhs_type = NULL_TREE;
3129 vectype_in = NULL_TREE;
3130 nargs = gimple_call_num_args (stmt);
3132 /* Bail out if the function has more than four arguments, we do not have
3133 interesting builtin functions to vectorize with more than two arguments
3134 except for fma. No arguments is also not good. */
3135 if (nargs == 0 || nargs > 4)
3136 return false;
3138 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3139 combined_fn cfn = gimple_call_combined_fn (stmt);
3140 if (cfn == CFN_GOMP_SIMD_LANE)
3142 nargs = 0;
3143 rhs_type = unsigned_type_node;
3146 int mask_opno = -1;
3147 if (internal_fn_p (cfn))
3148 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3150 for (i = 0; i < nargs; i++)
3152 if ((int) i == mask_opno)
3154 op = gimple_call_arg (stmt, i);
3155 if (!vect_check_scalar_mask (vinfo,
3156 stmt_info, op, &dt[i], &vectypes[i]))
3157 return false;
3158 continue;
3161 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3162 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3164 if (dump_enabled_p ())
3165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3166 "use not simple.\n");
3167 return false;
3170 /* We can only handle calls with arguments of the same type. */
3171 if (rhs_type
3172 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3176 "argument types differ.\n");
3177 return false;
3179 if (!rhs_type)
3180 rhs_type = TREE_TYPE (op);
3182 if (!vectype_in)
3183 vectype_in = vectypes[i];
3184 else if (vectypes[i]
3185 && !types_compatible_p (vectypes[i], vectype_in))
3187 if (dump_enabled_p ())
3188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3189 "argument vector types differ.\n");
3190 return false;
3193 /* If all arguments are external or constant defs, infer the vector type
3194 from the scalar type. */
3195 if (!vectype_in)
3196 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3197 if (vec_stmt)
3198 gcc_assert (vectype_in);
3199 if (!vectype_in)
3201 if (dump_enabled_p ())
3202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3203 "no vectype for scalar type %T\n", rhs_type);
3205 return false;
3207 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3208 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3209 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3210 by a pack of the two vectors into an SI vector. We would need
3211 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3212 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3214 if (dump_enabled_p ())
3215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3216 "mismatched vector sizes %T and %T\n",
3217 vectype_in, vectype_out);
3218 return false;
3221 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3222 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3224 if (dump_enabled_p ())
3225 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3226 "mixed mask and nonmask vector types\n");
3227 return false;
3230 /* FORNOW */
3231 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3232 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3233 if (known_eq (nunits_in * 2, nunits_out))
3234 modifier = NARROW;
3235 else if (known_eq (nunits_out, nunits_in))
3236 modifier = NONE;
3237 else if (known_eq (nunits_out * 2, nunits_in))
3238 modifier = WIDEN;
3239 else
3240 return false;
3242 /* We only handle functions that do not read or clobber memory. */
3243 if (gimple_vuse (stmt))
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "function reads from or writes to memory.\n");
3248 return false;
3251 /* For now, we only vectorize functions if a target specific builtin
3252 is available. TODO -- in some cases, it might be profitable to
3253 insert the calls for pieces of the vector, in order to be able
3254 to vectorize other operations in the loop. */
3255 fndecl = NULL_TREE;
3256 internal_fn ifn = IFN_LAST;
3257 tree callee = gimple_call_fndecl (stmt);
3259 /* First try using an internal function. */
3260 tree_code convert_code = ERROR_MARK;
3261 if (cfn != CFN_LAST
3262 && (modifier == NONE
3263 || (modifier == NARROW
3264 && simple_integer_narrowing (vectype_out, vectype_in,
3265 &convert_code))))
3266 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3267 vectype_in);
3269 /* If that fails, try asking for a target-specific built-in function. */
3270 if (ifn == IFN_LAST)
3272 if (cfn != CFN_LAST)
3273 fndecl = targetm.vectorize.builtin_vectorized_function
3274 (cfn, vectype_out, vectype_in);
3275 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3276 fndecl = targetm.vectorize.builtin_md_vectorized_function
3277 (callee, vectype_out, vectype_in);
3280 if (ifn == IFN_LAST && !fndecl)
3282 if (cfn == CFN_GOMP_SIMD_LANE
3283 && !slp_node
3284 && loop_vinfo
3285 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3286 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3287 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3288 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3290 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3291 { 0, 1, 2, ... vf - 1 } vector. */
3292 gcc_assert (nargs == 0);
3294 else if (modifier == NONE
3295 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3296 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3297 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3298 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3299 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3300 slp_op, vectype_in, cost_vec);
3301 else
3303 if (dump_enabled_p ())
3304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3305 "function is not vectorizable.\n");
3306 return false;
3310 if (slp_node)
3311 ncopies = 1;
3312 else if (modifier == NARROW && ifn == IFN_LAST)
3313 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3314 else
3315 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3317 /* Sanity check: make sure that at least one copy of the vectorized stmt
3318 needs to be generated. */
3319 gcc_assert (ncopies >= 1);
3321 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3322 if (!vec_stmt) /* transformation not required. */
3324 if (slp_node)
3325 for (i = 0; i < nargs; ++i)
3326 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3328 if (dump_enabled_p ())
3329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3330 "incompatible vector types for invariants\n");
3331 return false;
3333 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3334 DUMP_VECT_SCOPE ("vectorizable_call");
3335 vect_model_simple_cost (vinfo, stmt_info,
3336 ncopies, dt, ndts, slp_node, cost_vec);
3337 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3338 record_stmt_cost (cost_vec, ncopies / 2,
3339 vec_promote_demote, stmt_info, 0, vect_body);
3341 if (loop_vinfo && mask_opno >= 0)
3343 unsigned int nvectors = (slp_node
3344 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3345 : ncopies);
3346 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3347 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3348 vectype_out, scalar_mask);
3350 return true;
3353 /* Transform. */
3355 if (dump_enabled_p ())
3356 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3358 /* Handle def. */
3359 scalar_dest = gimple_call_lhs (stmt);
3360 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3362 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3364 if (modifier == NONE || ifn != IFN_LAST)
3366 tree prev_res = NULL_TREE;
3367 vargs.safe_grow (nargs);
3368 orig_vargs.safe_grow (nargs);
3369 auto_vec<vec<tree> > vec_defs (nargs);
3370 for (j = 0; j < ncopies; ++j)
3372 /* Build argument list for the vectorized call. */
3373 if (slp_node)
3375 vec<tree> vec_oprnds0;
3377 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3378 vec_oprnds0 = vec_defs[0];
3380 /* Arguments are ready. Create the new vector stmt. */
3381 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3383 size_t k;
3384 for (k = 0; k < nargs; k++)
3386 vec<tree> vec_oprndsk = vec_defs[k];
3387 vargs[k] = vec_oprndsk[i];
3389 gimple *new_stmt;
3390 if (modifier == NARROW)
3392 /* We don't define any narrowing conditional functions
3393 at present. */
3394 gcc_assert (mask_opno < 0);
3395 tree half_res = make_ssa_name (vectype_in);
3396 gcall *call
3397 = gimple_build_call_internal_vec (ifn, vargs);
3398 gimple_call_set_lhs (call, half_res);
3399 gimple_call_set_nothrow (call, true);
3400 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3401 if ((i & 1) == 0)
3403 prev_res = half_res;
3404 continue;
3406 new_temp = make_ssa_name (vec_dest);
3407 new_stmt = gimple_build_assign (new_temp, convert_code,
3408 prev_res, half_res);
3409 vect_finish_stmt_generation (vinfo, stmt_info,
3410 new_stmt, gsi);
3412 else
3414 if (mask_opno >= 0 && masked_loop_p)
3416 unsigned int vec_num = vec_oprnds0.length ();
3417 /* Always true for SLP. */
3418 gcc_assert (ncopies == 1);
3419 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3420 vectype_out, i);
3421 vargs[mask_opno] = prepare_load_store_mask
3422 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3425 gcall *call;
3426 if (ifn != IFN_LAST)
3427 call = gimple_build_call_internal_vec (ifn, vargs);
3428 else
3429 call = gimple_build_call_vec (fndecl, vargs);
3430 new_temp = make_ssa_name (vec_dest, call);
3431 gimple_call_set_lhs (call, new_temp);
3432 gimple_call_set_nothrow (call, true);
3433 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3434 new_stmt = call;
3436 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3438 continue;
3441 for (i = 0; i < nargs; i++)
3443 op = gimple_call_arg (stmt, i);
3444 if (j == 0)
3446 vec_defs.quick_push (vNULL);
3447 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3448 op, &vec_defs[i]);
3450 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3453 if (mask_opno >= 0 && masked_loop_p)
3455 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3456 vectype_out, j);
3457 vargs[mask_opno]
3458 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3459 vargs[mask_opno], gsi);
3462 gimple *new_stmt;
3463 if (cfn == CFN_GOMP_SIMD_LANE)
3465 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3466 tree new_var
3467 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3468 gimple *init_stmt = gimple_build_assign (new_var, cst);
3469 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3470 new_temp = make_ssa_name (vec_dest);
3471 new_stmt = gimple_build_assign (new_temp, new_var);
3472 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3474 else if (modifier == NARROW)
3476 /* We don't define any narrowing conditional functions at
3477 present. */
3478 gcc_assert (mask_opno < 0);
3479 tree half_res = make_ssa_name (vectype_in);
3480 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3481 gimple_call_set_lhs (call, half_res);
3482 gimple_call_set_nothrow (call, true);
3483 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3484 if ((j & 1) == 0)
3486 prev_res = half_res;
3487 continue;
3489 new_temp = make_ssa_name (vec_dest);
3490 new_stmt = gimple_build_assign (new_temp, convert_code,
3491 prev_res, half_res);
3492 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3494 else
3496 gcall *call;
3497 if (ifn != IFN_LAST)
3498 call = gimple_build_call_internal_vec (ifn, vargs);
3499 else
3500 call = gimple_build_call_vec (fndecl, vargs);
3501 new_temp = make_ssa_name (vec_dest, call);
3502 gimple_call_set_lhs (call, new_temp);
3503 gimple_call_set_nothrow (call, true);
3504 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3505 new_stmt = call;
3508 if (j == (modifier == NARROW ? 1 : 0))
3509 *vec_stmt = new_stmt;
3510 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3512 for (i = 0; i < nargs; i++)
3514 vec<tree> vec_oprndsi = vec_defs[i];
3515 vec_oprndsi.release ();
3518 else if (modifier == NARROW)
3520 auto_vec<vec<tree> > vec_defs (nargs);
3521 /* We don't define any narrowing conditional functions at present. */
3522 gcc_assert (mask_opno < 0);
3523 for (j = 0; j < ncopies; ++j)
3525 /* Build argument list for the vectorized call. */
3526 if (j == 0)
3527 vargs.create (nargs * 2);
3528 else
3529 vargs.truncate (0);
3531 if (slp_node)
3533 vec<tree> vec_oprnds0;
3535 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3536 vec_oprnds0 = vec_defs[0];
3538 /* Arguments are ready. Create the new vector stmt. */
3539 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3541 size_t k;
3542 vargs.truncate (0);
3543 for (k = 0; k < nargs; k++)
3545 vec<tree> vec_oprndsk = vec_defs[k];
3546 vargs.quick_push (vec_oprndsk[i]);
3547 vargs.quick_push (vec_oprndsk[i + 1]);
3549 gcall *call;
3550 if (ifn != IFN_LAST)
3551 call = gimple_build_call_internal_vec (ifn, vargs);
3552 else
3553 call = gimple_build_call_vec (fndecl, vargs);
3554 new_temp = make_ssa_name (vec_dest, call);
3555 gimple_call_set_lhs (call, new_temp);
3556 gimple_call_set_nothrow (call, true);
3557 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3558 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3560 continue;
3563 for (i = 0; i < nargs; i++)
3565 op = gimple_call_arg (stmt, i);
3566 if (j == 0)
3568 vec_defs.quick_push (vNULL);
3569 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3570 op, &vec_defs[i], vectypes[i]);
3572 vec_oprnd0 = vec_defs[i][2*j];
3573 vec_oprnd1 = vec_defs[i][2*j+1];
3575 vargs.quick_push (vec_oprnd0);
3576 vargs.quick_push (vec_oprnd1);
3579 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3580 new_temp = make_ssa_name (vec_dest, new_stmt);
3581 gimple_call_set_lhs (new_stmt, new_temp);
3582 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3584 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3587 if (!slp_node)
3588 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3590 for (i = 0; i < nargs; i++)
3592 vec<tree> vec_oprndsi = vec_defs[i];
3593 vec_oprndsi.release ();
3596 else
3597 /* No current target implements this case. */
3598 return false;
3600 vargs.release ();
3602 /* The call in STMT might prevent it from being removed in dce.
3603 We however cannot remove it here, due to the way the ssa name
3604 it defines is mapped to the new definition. So just replace
3605 rhs of the statement with something harmless. */
3607 if (slp_node)
3608 return true;
3610 stmt_info = vect_orig_stmt (stmt_info);
3611 lhs = gimple_get_lhs (stmt_info->stmt);
3613 gassign *new_stmt
3614 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3615 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3617 return true;
3621 struct simd_call_arg_info
3623 tree vectype;
3624 tree op;
3625 HOST_WIDE_INT linear_step;
3626 enum vect_def_type dt;
3627 unsigned int align;
3628 bool simd_lane_linear;
3631 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3632 is linear within simd lane (but not within whole loop), note it in
3633 *ARGINFO. */
3635 static void
3636 vect_simd_lane_linear (tree op, class loop *loop,
3637 struct simd_call_arg_info *arginfo)
3639 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3641 if (!is_gimple_assign (def_stmt)
3642 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3643 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3644 return;
3646 tree base = gimple_assign_rhs1 (def_stmt);
3647 HOST_WIDE_INT linear_step = 0;
3648 tree v = gimple_assign_rhs2 (def_stmt);
3649 while (TREE_CODE (v) == SSA_NAME)
3651 tree t;
3652 def_stmt = SSA_NAME_DEF_STMT (v);
3653 if (is_gimple_assign (def_stmt))
3654 switch (gimple_assign_rhs_code (def_stmt))
3656 case PLUS_EXPR:
3657 t = gimple_assign_rhs2 (def_stmt);
3658 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3659 return;
3660 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3661 v = gimple_assign_rhs1 (def_stmt);
3662 continue;
3663 case MULT_EXPR:
3664 t = gimple_assign_rhs2 (def_stmt);
3665 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3666 return;
3667 linear_step = tree_to_shwi (t);
3668 v = gimple_assign_rhs1 (def_stmt);
3669 continue;
3670 CASE_CONVERT:
3671 t = gimple_assign_rhs1 (def_stmt);
3672 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3673 || (TYPE_PRECISION (TREE_TYPE (v))
3674 < TYPE_PRECISION (TREE_TYPE (t))))
3675 return;
3676 if (!linear_step)
3677 linear_step = 1;
3678 v = t;
3679 continue;
3680 default:
3681 return;
3683 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3684 && loop->simduid
3685 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3686 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3687 == loop->simduid))
3689 if (!linear_step)
3690 linear_step = 1;
3691 arginfo->linear_step = linear_step;
3692 arginfo->op = base;
3693 arginfo->simd_lane_linear = true;
3694 return;
3699 /* Return the number of elements in vector type VECTYPE, which is associated
3700 with a SIMD clone. At present these vectors always have a constant
3701 length. */
3703 static unsigned HOST_WIDE_INT
3704 simd_clone_subparts (tree vectype)
3706 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3709 /* Function vectorizable_simd_clone_call.
3711 Check if STMT_INFO performs a function call that can be vectorized
3712 by calling a simd clone of the function.
3713 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3714 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3715 Return true if STMT_INFO is vectorizable in this way. */
3717 static bool
3718 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3719 gimple_stmt_iterator *gsi,
3720 gimple **vec_stmt, slp_tree slp_node,
3721 stmt_vector_for_cost *)
3723 tree vec_dest;
3724 tree scalar_dest;
3725 tree op, type;
3726 tree vec_oprnd0 = NULL_TREE;
3727 tree vectype;
3728 unsigned int nunits;
3729 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3730 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3731 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3732 tree fndecl, new_temp;
3733 int ncopies, j;
3734 auto_vec<simd_call_arg_info> arginfo;
3735 vec<tree> vargs = vNULL;
3736 size_t i, nargs;
3737 tree lhs, rtype, ratype;
3738 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3740 /* Is STMT a vectorizable call? */
3741 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3742 if (!stmt)
3743 return false;
3745 fndecl = gimple_call_fndecl (stmt);
3746 if (fndecl == NULL_TREE)
3747 return false;
3749 struct cgraph_node *node = cgraph_node::get (fndecl);
3750 if (node == NULL || node->simd_clones == NULL)
3751 return false;
3753 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3754 return false;
3756 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3757 && ! vec_stmt)
3758 return false;
3760 if (gimple_call_lhs (stmt)
3761 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3762 return false;
3764 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3766 vectype = STMT_VINFO_VECTYPE (stmt_info);
3768 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3769 return false;
3771 /* FORNOW */
3772 if (slp_node)
3773 return false;
3775 /* Process function arguments. */
3776 nargs = gimple_call_num_args (stmt);
3778 /* Bail out if the function has zero arguments. */
3779 if (nargs == 0)
3780 return false;
3782 arginfo.reserve (nargs, true);
3784 for (i = 0; i < nargs; i++)
3786 simd_call_arg_info thisarginfo;
3787 affine_iv iv;
3789 thisarginfo.linear_step = 0;
3790 thisarginfo.align = 0;
3791 thisarginfo.op = NULL_TREE;
3792 thisarginfo.simd_lane_linear = false;
3794 op = gimple_call_arg (stmt, i);
3795 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3796 &thisarginfo.vectype)
3797 || thisarginfo.dt == vect_uninitialized_def)
3799 if (dump_enabled_p ())
3800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3801 "use not simple.\n");
3802 return false;
3805 if (thisarginfo.dt == vect_constant_def
3806 || thisarginfo.dt == vect_external_def)
3807 gcc_assert (thisarginfo.vectype == NULL_TREE);
3808 else
3810 gcc_assert (thisarginfo.vectype != NULL_TREE);
3811 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3813 if (dump_enabled_p ())
3814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3815 "vector mask arguments are not supported\n");
3816 return false;
3820 /* For linear arguments, the analyze phase should have saved
3821 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3822 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3823 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3825 gcc_assert (vec_stmt);
3826 thisarginfo.linear_step
3827 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3828 thisarginfo.op
3829 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3830 thisarginfo.simd_lane_linear
3831 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3832 == boolean_true_node);
3833 /* If loop has been peeled for alignment, we need to adjust it. */
3834 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3835 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3836 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3838 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3839 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3840 tree opt = TREE_TYPE (thisarginfo.op);
3841 bias = fold_convert (TREE_TYPE (step), bias);
3842 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3843 thisarginfo.op
3844 = fold_build2 (POINTER_TYPE_P (opt)
3845 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3846 thisarginfo.op, bias);
3849 else if (!vec_stmt
3850 && thisarginfo.dt != vect_constant_def
3851 && thisarginfo.dt != vect_external_def
3852 && loop_vinfo
3853 && TREE_CODE (op) == SSA_NAME
3854 && simple_iv (loop, loop_containing_stmt (stmt), op,
3855 &iv, false)
3856 && tree_fits_shwi_p (iv.step))
3858 thisarginfo.linear_step = tree_to_shwi (iv.step);
3859 thisarginfo.op = iv.base;
3861 else if ((thisarginfo.dt == vect_constant_def
3862 || thisarginfo.dt == vect_external_def)
3863 && POINTER_TYPE_P (TREE_TYPE (op)))
3864 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3865 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3866 linear too. */
3867 if (POINTER_TYPE_P (TREE_TYPE (op))
3868 && !thisarginfo.linear_step
3869 && !vec_stmt
3870 && thisarginfo.dt != vect_constant_def
3871 && thisarginfo.dt != vect_external_def
3872 && loop_vinfo
3873 && !slp_node
3874 && TREE_CODE (op) == SSA_NAME)
3875 vect_simd_lane_linear (op, loop, &thisarginfo);
3877 arginfo.quick_push (thisarginfo);
3880 unsigned HOST_WIDE_INT vf;
3881 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3883 if (dump_enabled_p ())
3884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3885 "not considering SIMD clones; not yet supported"
3886 " for variable-width vectors.\n");
3887 return false;
3890 unsigned int badness = 0;
3891 struct cgraph_node *bestn = NULL;
3892 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3893 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3894 else
3895 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3896 n = n->simdclone->next_clone)
3898 unsigned int this_badness = 0;
3899 if (n->simdclone->simdlen > vf
3900 || n->simdclone->nargs != nargs)
3901 continue;
3902 if (n->simdclone->simdlen < vf)
3903 this_badness += (exact_log2 (vf)
3904 - exact_log2 (n->simdclone->simdlen)) * 1024;
3905 if (n->simdclone->inbranch)
3906 this_badness += 2048;
3907 int target_badness = targetm.simd_clone.usable (n);
3908 if (target_badness < 0)
3909 continue;
3910 this_badness += target_badness * 512;
3911 /* FORNOW: Have to add code to add the mask argument. */
3912 if (n->simdclone->inbranch)
3913 continue;
3914 for (i = 0; i < nargs; i++)
3916 switch (n->simdclone->args[i].arg_type)
3918 case SIMD_CLONE_ARG_TYPE_VECTOR:
3919 if (!useless_type_conversion_p
3920 (n->simdclone->args[i].orig_type,
3921 TREE_TYPE (gimple_call_arg (stmt, i))))
3922 i = -1;
3923 else if (arginfo[i].dt == vect_constant_def
3924 || arginfo[i].dt == vect_external_def
3925 || arginfo[i].linear_step)
3926 this_badness += 64;
3927 break;
3928 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3929 if (arginfo[i].dt != vect_constant_def
3930 && arginfo[i].dt != vect_external_def)
3931 i = -1;
3932 break;
3933 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3934 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3935 if (arginfo[i].dt == vect_constant_def
3936 || arginfo[i].dt == vect_external_def
3937 || (arginfo[i].linear_step
3938 != n->simdclone->args[i].linear_step))
3939 i = -1;
3940 break;
3941 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3942 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3943 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3944 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3945 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3946 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3947 /* FORNOW */
3948 i = -1;
3949 break;
3950 case SIMD_CLONE_ARG_TYPE_MASK:
3951 gcc_unreachable ();
3953 if (i == (size_t) -1)
3954 break;
3955 if (n->simdclone->args[i].alignment > arginfo[i].align)
3957 i = -1;
3958 break;
3960 if (arginfo[i].align)
3961 this_badness += (exact_log2 (arginfo[i].align)
3962 - exact_log2 (n->simdclone->args[i].alignment));
3964 if (i == (size_t) -1)
3965 continue;
3966 if (bestn == NULL || this_badness < badness)
3968 bestn = n;
3969 badness = this_badness;
3973 if (bestn == NULL)
3974 return false;
3976 for (i = 0; i < nargs; i++)
3977 if ((arginfo[i].dt == vect_constant_def
3978 || arginfo[i].dt == vect_external_def)
3979 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3981 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3982 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3983 slp_node);
3984 if (arginfo[i].vectype == NULL
3985 || (simd_clone_subparts (arginfo[i].vectype)
3986 > bestn->simdclone->simdlen))
3987 return false;
3990 fndecl = bestn->decl;
3991 nunits = bestn->simdclone->simdlen;
3992 ncopies = vf / nunits;
3994 /* If the function isn't const, only allow it in simd loops where user
3995 has asserted that at least nunits consecutive iterations can be
3996 performed using SIMD instructions. */
3997 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3998 && gimple_vuse (stmt))
3999 return false;
4001 /* Sanity check: make sure that at least one copy of the vectorized stmt
4002 needs to be generated. */
4003 gcc_assert (ncopies >= 1);
4005 if (!vec_stmt) /* transformation not required. */
4007 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4008 for (i = 0; i < nargs; i++)
4009 if ((bestn->simdclone->args[i].arg_type
4010 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4011 || (bestn->simdclone->args[i].arg_type
4012 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4014 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4015 + 1);
4016 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4017 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4018 ? size_type_node : TREE_TYPE (arginfo[i].op);
4019 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4021 tree sll = arginfo[i].simd_lane_linear
4022 ? boolean_true_node : boolean_false_node;
4023 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4025 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4026 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4027 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4028 dt, slp_node, cost_vec); */
4029 return true;
4032 /* Transform. */
4034 if (dump_enabled_p ())
4035 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4037 /* Handle def. */
4038 scalar_dest = gimple_call_lhs (stmt);
4039 vec_dest = NULL_TREE;
4040 rtype = NULL_TREE;
4041 ratype = NULL_TREE;
4042 if (scalar_dest)
4044 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4045 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4046 if (TREE_CODE (rtype) == ARRAY_TYPE)
4048 ratype = rtype;
4049 rtype = TREE_TYPE (ratype);
4053 auto_vec<vec<tree> > vec_oprnds;
4054 auto_vec<unsigned> vec_oprnds_i;
4055 vec_oprnds.safe_grow_cleared (nargs);
4056 vec_oprnds_i.safe_grow_cleared (nargs);
4057 for (j = 0; j < ncopies; ++j)
4059 /* Build argument list for the vectorized call. */
4060 if (j == 0)
4061 vargs.create (nargs);
4062 else
4063 vargs.truncate (0);
4065 for (i = 0; i < nargs; i++)
4067 unsigned int k, l, m, o;
4068 tree atype;
4069 op = gimple_call_arg (stmt, i);
4070 switch (bestn->simdclone->args[i].arg_type)
4072 case SIMD_CLONE_ARG_TYPE_VECTOR:
4073 atype = bestn->simdclone->args[i].vector_type;
4074 o = nunits / simd_clone_subparts (atype);
4075 for (m = j * o; m < (j + 1) * o; m++)
4077 if (simd_clone_subparts (atype)
4078 < simd_clone_subparts (arginfo[i].vectype))
4080 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4081 k = (simd_clone_subparts (arginfo[i].vectype)
4082 / simd_clone_subparts (atype));
4083 gcc_assert ((k & (k - 1)) == 0);
4084 if (m == 0)
4086 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4087 ncopies * o / k, op,
4088 &vec_oprnds[i]);
4089 vec_oprnds_i[i] = 0;
4090 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4092 else
4094 vec_oprnd0 = arginfo[i].op;
4095 if ((m & (k - 1)) == 0)
4096 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4098 arginfo[i].op = vec_oprnd0;
4099 vec_oprnd0
4100 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4101 bitsize_int (prec),
4102 bitsize_int ((m & (k - 1)) * prec));
4103 gassign *new_stmt
4104 = gimple_build_assign (make_ssa_name (atype),
4105 vec_oprnd0);
4106 vect_finish_stmt_generation (vinfo, stmt_info,
4107 new_stmt, gsi);
4108 vargs.safe_push (gimple_assign_lhs (new_stmt));
4110 else
4112 k = (simd_clone_subparts (atype)
4113 / simd_clone_subparts (arginfo[i].vectype));
4114 gcc_assert ((k & (k - 1)) == 0);
4115 vec<constructor_elt, va_gc> *ctor_elts;
4116 if (k != 1)
4117 vec_alloc (ctor_elts, k);
4118 else
4119 ctor_elts = NULL;
4120 for (l = 0; l < k; l++)
4122 if (m == 0 && l == 0)
4124 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4125 k * o * ncopies,
4127 &vec_oprnds[i]);
4128 vec_oprnds_i[i] = 0;
4129 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4131 else
4132 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4133 arginfo[i].op = vec_oprnd0;
4134 if (k == 1)
4135 break;
4136 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4137 vec_oprnd0);
4139 if (k == 1)
4140 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4141 atype))
4143 vec_oprnd0
4144 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4145 gassign *new_stmt
4146 = gimple_build_assign (make_ssa_name (atype),
4147 vec_oprnd0);
4148 vect_finish_stmt_generation (vinfo, stmt_info,
4149 new_stmt, gsi);
4150 vargs.safe_push (gimple_assign_lhs (new_stmt));
4152 else
4153 vargs.safe_push (vec_oprnd0);
4154 else
4156 vec_oprnd0 = build_constructor (atype, ctor_elts);
4157 gassign *new_stmt
4158 = gimple_build_assign (make_ssa_name (atype),
4159 vec_oprnd0);
4160 vect_finish_stmt_generation (vinfo, stmt_info,
4161 new_stmt, gsi);
4162 vargs.safe_push (gimple_assign_lhs (new_stmt));
4166 break;
4167 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4168 vargs.safe_push (op);
4169 break;
4170 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4171 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4172 if (j == 0)
4174 gimple_seq stmts;
4175 arginfo[i].op
4176 = force_gimple_operand (unshare_expr (arginfo[i].op),
4177 &stmts, true, NULL_TREE);
4178 if (stmts != NULL)
4180 basic_block new_bb;
4181 edge pe = loop_preheader_edge (loop);
4182 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4183 gcc_assert (!new_bb);
4185 if (arginfo[i].simd_lane_linear)
4187 vargs.safe_push (arginfo[i].op);
4188 break;
4190 tree phi_res = copy_ssa_name (op);
4191 gphi *new_phi = create_phi_node (phi_res, loop->header);
4192 add_phi_arg (new_phi, arginfo[i].op,
4193 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4194 enum tree_code code
4195 = POINTER_TYPE_P (TREE_TYPE (op))
4196 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4197 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4198 ? sizetype : TREE_TYPE (op);
4199 widest_int cst
4200 = wi::mul (bestn->simdclone->args[i].linear_step,
4201 ncopies * nunits);
4202 tree tcst = wide_int_to_tree (type, cst);
4203 tree phi_arg = copy_ssa_name (op);
4204 gassign *new_stmt
4205 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4206 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4207 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4208 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4209 UNKNOWN_LOCATION);
4210 arginfo[i].op = phi_res;
4211 vargs.safe_push (phi_res);
4213 else
4215 enum tree_code code
4216 = POINTER_TYPE_P (TREE_TYPE (op))
4217 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4218 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4219 ? sizetype : TREE_TYPE (op);
4220 widest_int cst
4221 = wi::mul (bestn->simdclone->args[i].linear_step,
4222 j * nunits);
4223 tree tcst = wide_int_to_tree (type, cst);
4224 new_temp = make_ssa_name (TREE_TYPE (op));
4225 gassign *new_stmt
4226 = gimple_build_assign (new_temp, code,
4227 arginfo[i].op, tcst);
4228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4229 vargs.safe_push (new_temp);
4231 break;
4232 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4233 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4234 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4235 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4236 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4237 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4238 default:
4239 gcc_unreachable ();
4243 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4244 if (vec_dest)
4246 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4247 if (ratype)
4248 new_temp = create_tmp_var (ratype);
4249 else if (useless_type_conversion_p (vectype, rtype))
4250 new_temp = make_ssa_name (vec_dest, new_call);
4251 else
4252 new_temp = make_ssa_name (rtype, new_call);
4253 gimple_call_set_lhs (new_call, new_temp);
4255 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4256 gimple *new_stmt = new_call;
4258 if (vec_dest)
4260 if (simd_clone_subparts (vectype) < nunits)
4262 unsigned int k, l;
4263 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4264 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4265 k = nunits / simd_clone_subparts (vectype);
4266 gcc_assert ((k & (k - 1)) == 0);
4267 for (l = 0; l < k; l++)
4269 tree t;
4270 if (ratype)
4272 t = build_fold_addr_expr (new_temp);
4273 t = build2 (MEM_REF, vectype, t,
4274 build_int_cst (TREE_TYPE (t), l * bytes));
4276 else
4277 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4278 bitsize_int (prec), bitsize_int (l * prec));
4279 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4280 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4282 if (j == 0 && l == 0)
4283 *vec_stmt = new_stmt;
4284 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4287 if (ratype)
4288 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4289 continue;
4291 else if (simd_clone_subparts (vectype) > nunits)
4293 unsigned int k = (simd_clone_subparts (vectype)
4294 / simd_clone_subparts (rtype));
4295 gcc_assert ((k & (k - 1)) == 0);
4296 if ((j & (k - 1)) == 0)
4297 vec_alloc (ret_ctor_elts, k);
4298 if (ratype)
4300 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4301 for (m = 0; m < o; m++)
4303 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4304 size_int (m), NULL_TREE, NULL_TREE);
4305 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4306 tem);
4307 vect_finish_stmt_generation (vinfo, stmt_info,
4308 new_stmt, gsi);
4309 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4310 gimple_assign_lhs (new_stmt));
4312 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4314 else
4315 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4316 if ((j & (k - 1)) != k - 1)
4317 continue;
4318 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4319 new_stmt
4320 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4321 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4323 if ((unsigned) j == k - 1)
4324 *vec_stmt = new_stmt;
4325 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4326 continue;
4328 else if (ratype)
4330 tree t = build_fold_addr_expr (new_temp);
4331 t = build2 (MEM_REF, vectype, t,
4332 build_int_cst (TREE_TYPE (t), 0));
4333 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4334 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4335 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4337 else if (!useless_type_conversion_p (vectype, rtype))
4339 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4340 new_stmt
4341 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4342 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4346 if (j == 0)
4347 *vec_stmt = new_stmt;
4348 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4351 for (i = 0; i < nargs; ++i)
4353 vec<tree> oprndsi = vec_oprnds[i];
4354 oprndsi.release ();
4356 vargs.release ();
4358 /* The call in STMT might prevent it from being removed in dce.
4359 We however cannot remove it here, due to the way the ssa name
4360 it defines is mapped to the new definition. So just replace
4361 rhs of the statement with something harmless. */
4363 if (slp_node)
4364 return true;
4366 gimple *new_stmt;
4367 if (scalar_dest)
4369 type = TREE_TYPE (scalar_dest);
4370 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4371 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4373 else
4374 new_stmt = gimple_build_nop ();
4375 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4376 unlink_stmt_vdef (stmt);
4378 return true;
4382 /* Function vect_gen_widened_results_half
4384 Create a vector stmt whose code, type, number of arguments, and result
4385 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4386 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4387 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4388 needs to be created (DECL is a function-decl of a target-builtin).
4389 STMT_INFO is the original scalar stmt that we are vectorizing. */
4391 static gimple *
4392 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4393 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4394 tree vec_dest, gimple_stmt_iterator *gsi,
4395 stmt_vec_info stmt_info)
4397 gimple *new_stmt;
4398 tree new_temp;
4400 /* Generate half of the widened result: */
4401 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4402 if (op_type != binary_op)
4403 vec_oprnd1 = NULL;
4404 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4405 new_temp = make_ssa_name (vec_dest, new_stmt);
4406 gimple_assign_set_lhs (new_stmt, new_temp);
4407 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4409 return new_stmt;
4413 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4414 For multi-step conversions store the resulting vectors and call the function
4415 recursively. */
4417 static void
4418 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4419 int multi_step_cvt,
4420 stmt_vec_info stmt_info,
4421 vec<tree> vec_dsts,
4422 gimple_stmt_iterator *gsi,
4423 slp_tree slp_node, enum tree_code code)
4425 unsigned int i;
4426 tree vop0, vop1, new_tmp, vec_dest;
4428 vec_dest = vec_dsts.pop ();
4430 for (i = 0; i < vec_oprnds->length (); i += 2)
4432 /* Create demotion operation. */
4433 vop0 = (*vec_oprnds)[i];
4434 vop1 = (*vec_oprnds)[i + 1];
4435 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4436 new_tmp = make_ssa_name (vec_dest, new_stmt);
4437 gimple_assign_set_lhs (new_stmt, new_tmp);
4438 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4440 if (multi_step_cvt)
4441 /* Store the resulting vector for next recursive call. */
4442 (*vec_oprnds)[i/2] = new_tmp;
4443 else
4445 /* This is the last step of the conversion sequence. Store the
4446 vectors in SLP_NODE or in vector info of the scalar statement
4447 (or in STMT_VINFO_RELATED_STMT chain). */
4448 if (slp_node)
4449 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4450 else
4451 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4455 /* For multi-step demotion operations we first generate demotion operations
4456 from the source type to the intermediate types, and then combine the
4457 results (stored in VEC_OPRNDS) in demotion operation to the destination
4458 type. */
4459 if (multi_step_cvt)
4461 /* At each level of recursion we have half of the operands we had at the
4462 previous level. */
4463 vec_oprnds->truncate ((i+1)/2);
4464 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4465 multi_step_cvt - 1,
4466 stmt_info, vec_dsts, gsi,
4467 slp_node, VEC_PACK_TRUNC_EXPR);
4470 vec_dsts.quick_push (vec_dest);
4474 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4475 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4476 STMT_INFO. For multi-step conversions store the resulting vectors and
4477 call the function recursively. */
4479 static void
4480 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4481 vec<tree> *vec_oprnds0,
4482 vec<tree> *vec_oprnds1,
4483 stmt_vec_info stmt_info, tree vec_dest,
4484 gimple_stmt_iterator *gsi,
4485 enum tree_code code1,
4486 enum tree_code code2, int op_type)
4488 int i;
4489 tree vop0, vop1, new_tmp1, new_tmp2;
4490 gimple *new_stmt1, *new_stmt2;
4491 vec<tree> vec_tmp = vNULL;
4493 vec_tmp.create (vec_oprnds0->length () * 2);
4494 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4496 if (op_type == binary_op)
4497 vop1 = (*vec_oprnds1)[i];
4498 else
4499 vop1 = NULL_TREE;
4501 /* Generate the two halves of promotion operation. */
4502 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4503 op_type, vec_dest, gsi,
4504 stmt_info);
4505 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4506 op_type, vec_dest, gsi,
4507 stmt_info);
4508 if (is_gimple_call (new_stmt1))
4510 new_tmp1 = gimple_call_lhs (new_stmt1);
4511 new_tmp2 = gimple_call_lhs (new_stmt2);
4513 else
4515 new_tmp1 = gimple_assign_lhs (new_stmt1);
4516 new_tmp2 = gimple_assign_lhs (new_stmt2);
4519 /* Store the results for the next step. */
4520 vec_tmp.quick_push (new_tmp1);
4521 vec_tmp.quick_push (new_tmp2);
4524 vec_oprnds0->release ();
4525 *vec_oprnds0 = vec_tmp;
4529 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4530 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4531 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4532 Return true if STMT_INFO is vectorizable in this way. */
4534 static bool
4535 vectorizable_conversion (vec_info *vinfo,
4536 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4537 gimple **vec_stmt, slp_tree slp_node,
4538 stmt_vector_for_cost *cost_vec)
4540 tree vec_dest;
4541 tree scalar_dest;
4542 tree op0, op1 = NULL_TREE;
4543 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4544 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4545 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4546 tree new_temp;
4547 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4548 int ndts = 2;
4549 poly_uint64 nunits_in;
4550 poly_uint64 nunits_out;
4551 tree vectype_out, vectype_in;
4552 int ncopies, i;
4553 tree lhs_type, rhs_type;
4554 enum { NARROW, NONE, WIDEN } modifier;
4555 vec<tree> vec_oprnds0 = vNULL;
4556 vec<tree> vec_oprnds1 = vNULL;
4557 tree vop0;
4558 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4559 int multi_step_cvt = 0;
4560 vec<tree> interm_types = vNULL;
4561 tree intermediate_type, cvt_type = NULL_TREE;
4562 int op_type;
4563 unsigned short fltsz;
4565 /* Is STMT a vectorizable conversion? */
4567 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4568 return false;
4570 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4571 && ! vec_stmt)
4572 return false;
4574 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4575 if (!stmt)
4576 return false;
4578 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4579 return false;
4581 code = gimple_assign_rhs_code (stmt);
4582 if (!CONVERT_EXPR_CODE_P (code)
4583 && code != FIX_TRUNC_EXPR
4584 && code != FLOAT_EXPR
4585 && code != WIDEN_MULT_EXPR
4586 && code != WIDEN_LSHIFT_EXPR)
4587 return false;
4589 op_type = TREE_CODE_LENGTH (code);
4591 /* Check types of lhs and rhs. */
4592 scalar_dest = gimple_assign_lhs (stmt);
4593 lhs_type = TREE_TYPE (scalar_dest);
4594 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4596 /* Check the operands of the operation. */
4597 slp_tree slp_op0, slp_op1 = NULL;
4598 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4599 0, &op0, &slp_op0, &dt[0], &vectype_in))
4601 if (dump_enabled_p ())
4602 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4603 "use not simple.\n");
4604 return false;
4607 rhs_type = TREE_TYPE (op0);
4608 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4609 && !((INTEGRAL_TYPE_P (lhs_type)
4610 && INTEGRAL_TYPE_P (rhs_type))
4611 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4612 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4613 return false;
4615 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4616 && ((INTEGRAL_TYPE_P (lhs_type)
4617 && !type_has_mode_precision_p (lhs_type))
4618 || (INTEGRAL_TYPE_P (rhs_type)
4619 && !type_has_mode_precision_p (rhs_type))))
4621 if (dump_enabled_p ())
4622 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4623 "type conversion to/from bit-precision unsupported."
4624 "\n");
4625 return false;
4628 if (op_type == binary_op)
4630 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4632 op1 = gimple_assign_rhs2 (stmt);
4633 tree vectype1_in;
4634 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4635 &op1, &slp_op1, &dt[1], &vectype1_in))
4637 if (dump_enabled_p ())
4638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4639 "use not simple.\n");
4640 return false;
4642 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4643 OP1. */
4644 if (!vectype_in)
4645 vectype_in = vectype1_in;
4648 /* If op0 is an external or constant def, infer the vector type
4649 from the scalar type. */
4650 if (!vectype_in)
4651 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4652 if (vec_stmt)
4653 gcc_assert (vectype_in);
4654 if (!vectype_in)
4656 if (dump_enabled_p ())
4657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4658 "no vectype for scalar type %T\n", rhs_type);
4660 return false;
4663 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4664 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4666 if (dump_enabled_p ())
4667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4668 "can't convert between boolean and non "
4669 "boolean vectors %T\n", rhs_type);
4671 return false;
4674 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4675 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4676 if (known_eq (nunits_out, nunits_in))
4677 modifier = NONE;
4678 else if (multiple_p (nunits_out, nunits_in))
4679 modifier = NARROW;
4680 else
4682 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4683 modifier = WIDEN;
4686 /* Multiple types in SLP are handled by creating the appropriate number of
4687 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4688 case of SLP. */
4689 if (slp_node)
4690 ncopies = 1;
4691 else if (modifier == NARROW)
4692 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4693 else
4694 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4696 /* Sanity check: make sure that at least one copy of the vectorized stmt
4697 needs to be generated. */
4698 gcc_assert (ncopies >= 1);
4700 bool found_mode = false;
4701 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4702 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4703 opt_scalar_mode rhs_mode_iter;
4705 /* Supportable by target? */
4706 switch (modifier)
4708 case NONE:
4709 if (code != FIX_TRUNC_EXPR
4710 && code != FLOAT_EXPR
4711 && !CONVERT_EXPR_CODE_P (code))
4712 return false;
4713 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4714 break;
4715 /* FALLTHRU */
4716 unsupported:
4717 if (dump_enabled_p ())
4718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4719 "conversion not supported by target.\n");
4720 return false;
4722 case WIDEN:
4723 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4724 vectype_in, &code1, &code2,
4725 &multi_step_cvt, &interm_types))
4727 /* Binary widening operation can only be supported directly by the
4728 architecture. */
4729 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4730 break;
4733 if (code != FLOAT_EXPR
4734 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4735 goto unsupported;
4737 fltsz = GET_MODE_SIZE (lhs_mode);
4738 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4740 rhs_mode = rhs_mode_iter.require ();
4741 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4742 break;
4744 cvt_type
4745 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4746 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4747 if (cvt_type == NULL_TREE)
4748 goto unsupported;
4750 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4752 if (!supportable_convert_operation (code, vectype_out,
4753 cvt_type, &codecvt1))
4754 goto unsupported;
4756 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4757 vectype_out, cvt_type,
4758 &codecvt1, &codecvt2,
4759 &multi_step_cvt,
4760 &interm_types))
4761 continue;
4762 else
4763 gcc_assert (multi_step_cvt == 0);
4765 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4766 cvt_type,
4767 vectype_in, &code1, &code2,
4768 &multi_step_cvt, &interm_types))
4770 found_mode = true;
4771 break;
4775 if (!found_mode)
4776 goto unsupported;
4778 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4779 codecvt2 = ERROR_MARK;
4780 else
4782 multi_step_cvt++;
4783 interm_types.safe_push (cvt_type);
4784 cvt_type = NULL_TREE;
4786 break;
4788 case NARROW:
4789 gcc_assert (op_type == unary_op);
4790 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4791 &code1, &multi_step_cvt,
4792 &interm_types))
4793 break;
4795 if (code != FIX_TRUNC_EXPR
4796 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4797 goto unsupported;
4799 cvt_type
4800 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4801 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4802 if (cvt_type == NULL_TREE)
4803 goto unsupported;
4804 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4805 &codecvt1))
4806 goto unsupported;
4807 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4808 &code1, &multi_step_cvt,
4809 &interm_types))
4810 break;
4811 goto unsupported;
4813 default:
4814 gcc_unreachable ();
4817 if (!vec_stmt) /* transformation not required. */
4819 if (slp_node
4820 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4821 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4823 if (dump_enabled_p ())
4824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4825 "incompatible vector types for invariants\n");
4826 return false;
4828 DUMP_VECT_SCOPE ("vectorizable_conversion");
4829 if (modifier == NONE)
4831 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4832 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4833 cost_vec);
4835 else if (modifier == NARROW)
4837 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4838 /* The final packing step produces one vector result per copy. */
4839 unsigned int nvectors
4840 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4841 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4842 multi_step_cvt, cost_vec);
4844 else
4846 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4847 /* The initial unpacking step produces two vector results
4848 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4849 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4850 unsigned int nvectors
4851 = (slp_node
4852 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4853 : ncopies * 2);
4854 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4855 multi_step_cvt, cost_vec);
4857 interm_types.release ();
4858 return true;
4861 /* Transform. */
4862 if (dump_enabled_p ())
4863 dump_printf_loc (MSG_NOTE, vect_location,
4864 "transform conversion. ncopies = %d.\n", ncopies);
4866 if (op_type == binary_op)
4868 if (CONSTANT_CLASS_P (op0))
4869 op0 = fold_convert (TREE_TYPE (op1), op0);
4870 else if (CONSTANT_CLASS_P (op1))
4871 op1 = fold_convert (TREE_TYPE (op0), op1);
4874 /* In case of multi-step conversion, we first generate conversion operations
4875 to the intermediate types, and then from that types to the final one.
4876 We create vector destinations for the intermediate type (TYPES) received
4877 from supportable_*_operation, and store them in the correct order
4878 for future use in vect_create_vectorized_*_stmts (). */
4879 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4880 vec_dest = vect_create_destination_var (scalar_dest,
4881 (cvt_type && modifier == WIDEN)
4882 ? cvt_type : vectype_out);
4883 vec_dsts.quick_push (vec_dest);
4885 if (multi_step_cvt)
4887 for (i = interm_types.length () - 1;
4888 interm_types.iterate (i, &intermediate_type); i--)
4890 vec_dest = vect_create_destination_var (scalar_dest,
4891 intermediate_type);
4892 vec_dsts.quick_push (vec_dest);
4896 if (cvt_type)
4897 vec_dest = vect_create_destination_var (scalar_dest,
4898 modifier == WIDEN
4899 ? vectype_out : cvt_type);
4901 int ninputs = 1;
4902 if (!slp_node)
4904 if (modifier == WIDEN)
4906 else if (modifier == NARROW)
4908 if (multi_step_cvt)
4909 ninputs = vect_pow2 (multi_step_cvt);
4910 ninputs *= 2;
4914 switch (modifier)
4916 case NONE:
4917 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
4918 op0, &vec_oprnds0);
4919 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4921 /* Arguments are ready, create the new vector stmt. */
4922 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4923 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4924 new_temp = make_ssa_name (vec_dest, new_stmt);
4925 gimple_assign_set_lhs (new_stmt, new_temp);
4926 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4928 if (slp_node)
4929 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4930 else
4931 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4933 break;
4935 case WIDEN:
4936 /* In case the vectorization factor (VF) is bigger than the number
4937 of elements that we can fit in a vectype (nunits), we have to
4938 generate more than one vector stmt - i.e - we need to "unroll"
4939 the vector stmt by a factor VF/nunits. */
4940 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4941 op0, &vec_oprnds0,
4942 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
4943 &vec_oprnds1);
4944 if (code == WIDEN_LSHIFT_EXPR)
4946 vec_oprnds1.create (ncopies * ninputs);
4947 for (i = 0; i < ncopies * ninputs; ++i)
4948 vec_oprnds1.quick_push (op1);
4950 /* Arguments are ready. Create the new vector stmts. */
4951 for (i = multi_step_cvt; i >= 0; i--)
4953 tree this_dest = vec_dsts[i];
4954 enum tree_code c1 = code1, c2 = code2;
4955 if (i == 0 && codecvt2 != ERROR_MARK)
4957 c1 = codecvt1;
4958 c2 = codecvt2;
4960 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
4961 &vec_oprnds1, stmt_info,
4962 this_dest, gsi,
4963 c1, c2, op_type);
4966 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4968 gimple *new_stmt;
4969 if (cvt_type)
4971 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4972 new_temp = make_ssa_name (vec_dest);
4973 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
4974 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4976 else
4977 new_stmt = SSA_NAME_DEF_STMT (vop0);
4979 if (slp_node)
4980 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4981 else
4982 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4984 break;
4986 case NARROW:
4987 /* In case the vectorization factor (VF) is bigger than the number
4988 of elements that we can fit in a vectype (nunits), we have to
4989 generate more than one vector stmt - i.e - we need to "unroll"
4990 the vector stmt by a factor VF/nunits. */
4991 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4992 op0, &vec_oprnds0);
4993 /* Arguments are ready. Create the new vector stmts. */
4994 if (cvt_type)
4995 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4997 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4998 new_temp = make_ssa_name (vec_dest);
4999 gassign *new_stmt
5000 = gimple_build_assign (new_temp, codecvt1, vop0);
5001 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5002 vec_oprnds0[i] = new_temp;
5005 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5006 multi_step_cvt,
5007 stmt_info, vec_dsts, gsi,
5008 slp_node, code1);
5009 break;
5011 if (!slp_node)
5012 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5014 vec_oprnds0.release ();
5015 vec_oprnds1.release ();
5016 interm_types.release ();
5018 return true;
5021 /* Return true if we can assume from the scalar form of STMT_INFO that
5022 neither the scalar nor the vector forms will generate code. STMT_INFO
5023 is known not to involve a data reference. */
5025 bool
5026 vect_nop_conversion_p (stmt_vec_info stmt_info)
5028 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5029 if (!stmt)
5030 return false;
5032 tree lhs = gimple_assign_lhs (stmt);
5033 tree_code code = gimple_assign_rhs_code (stmt);
5034 tree rhs = gimple_assign_rhs1 (stmt);
5036 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5037 return true;
5039 if (CONVERT_EXPR_CODE_P (code))
5040 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5042 return false;
5045 /* Function vectorizable_assignment.
5047 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5048 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5049 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5050 Return true if STMT_INFO is vectorizable in this way. */
5052 static bool
5053 vectorizable_assignment (vec_info *vinfo,
5054 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5055 gimple **vec_stmt, slp_tree slp_node,
5056 stmt_vector_for_cost *cost_vec)
5058 tree vec_dest;
5059 tree scalar_dest;
5060 tree op;
5061 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5062 tree new_temp;
5063 enum vect_def_type dt[1] = {vect_unknown_def_type};
5064 int ndts = 1;
5065 int ncopies;
5066 int i;
5067 vec<tree> vec_oprnds = vNULL;
5068 tree vop;
5069 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5070 enum tree_code code;
5071 tree vectype_in;
5073 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5074 return false;
5076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5077 && ! vec_stmt)
5078 return false;
5080 /* Is vectorizable assignment? */
5081 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5082 if (!stmt)
5083 return false;
5085 scalar_dest = gimple_assign_lhs (stmt);
5086 if (TREE_CODE (scalar_dest) != SSA_NAME)
5087 return false;
5089 if (STMT_VINFO_DATA_REF (stmt_info))
5090 return false;
5092 code = gimple_assign_rhs_code (stmt);
5093 if (!(gimple_assign_single_p (stmt)
5094 || code == PAREN_EXPR
5095 || CONVERT_EXPR_CODE_P (code)))
5096 return false;
5098 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5099 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5101 /* Multiple types in SLP are handled by creating the appropriate number of
5102 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5103 case of SLP. */
5104 if (slp_node)
5105 ncopies = 1;
5106 else
5107 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5109 gcc_assert (ncopies >= 1);
5111 slp_tree slp_op;
5112 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5113 &dt[0], &vectype_in))
5115 if (dump_enabled_p ())
5116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5117 "use not simple.\n");
5118 return false;
5120 if (!vectype_in)
5121 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5123 /* We can handle NOP_EXPR conversions that do not change the number
5124 of elements or the vector size. */
5125 if ((CONVERT_EXPR_CODE_P (code)
5126 || code == VIEW_CONVERT_EXPR)
5127 && (!vectype_in
5128 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5129 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5130 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5131 return false;
5133 /* We do not handle bit-precision changes. */
5134 if ((CONVERT_EXPR_CODE_P (code)
5135 || code == VIEW_CONVERT_EXPR)
5136 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5137 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5138 || !type_has_mode_precision_p (TREE_TYPE (op)))
5139 /* But a conversion that does not change the bit-pattern is ok. */
5140 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5141 > TYPE_PRECISION (TREE_TYPE (op)))
5142 && TYPE_UNSIGNED (TREE_TYPE (op)))
5143 /* Conversion between boolean types of different sizes is
5144 a simple assignment in case their vectypes are same
5145 boolean vectors. */
5146 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5147 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5149 if (dump_enabled_p ())
5150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5151 "type conversion to/from bit-precision "
5152 "unsupported.\n");
5153 return false;
5156 if (!vec_stmt) /* transformation not required. */
5158 if (slp_node
5159 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5161 if (dump_enabled_p ())
5162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5163 "incompatible vector types for invariants\n");
5164 return false;
5166 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5167 DUMP_VECT_SCOPE ("vectorizable_assignment");
5168 if (!vect_nop_conversion_p (stmt_info))
5169 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5170 cost_vec);
5171 return true;
5174 /* Transform. */
5175 if (dump_enabled_p ())
5176 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5178 /* Handle def. */
5179 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5181 /* Handle use. */
5182 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5184 /* Arguments are ready. create the new vector stmt. */
5185 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5187 if (CONVERT_EXPR_CODE_P (code)
5188 || code == VIEW_CONVERT_EXPR)
5189 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5190 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5191 new_temp = make_ssa_name (vec_dest, new_stmt);
5192 gimple_assign_set_lhs (new_stmt, new_temp);
5193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5194 if (slp_node)
5195 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5196 else
5197 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5199 if (!slp_node)
5200 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5202 vec_oprnds.release ();
5203 return true;
5207 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5208 either as shift by a scalar or by a vector. */
5210 bool
5211 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5214 machine_mode vec_mode;
5215 optab optab;
5216 int icode;
5217 tree vectype;
5219 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5220 if (!vectype)
5221 return false;
5223 optab = optab_for_tree_code (code, vectype, optab_scalar);
5224 if (!optab
5225 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5227 optab = optab_for_tree_code (code, vectype, optab_vector);
5228 if (!optab
5229 || (optab_handler (optab, TYPE_MODE (vectype))
5230 == CODE_FOR_nothing))
5231 return false;
5234 vec_mode = TYPE_MODE (vectype);
5235 icode = (int) optab_handler (optab, vec_mode);
5236 if (icode == CODE_FOR_nothing)
5237 return false;
5239 return true;
5243 /* Function vectorizable_shift.
5245 Check if STMT_INFO performs a shift operation that can be vectorized.
5246 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5247 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5248 Return true if STMT_INFO is vectorizable in this way. */
5250 static bool
5251 vectorizable_shift (vec_info *vinfo,
5252 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5253 gimple **vec_stmt, slp_tree slp_node,
5254 stmt_vector_for_cost *cost_vec)
5256 tree vec_dest;
5257 tree scalar_dest;
5258 tree op0, op1 = NULL;
5259 tree vec_oprnd1 = NULL_TREE;
5260 tree vectype;
5261 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5262 enum tree_code code;
5263 machine_mode vec_mode;
5264 tree new_temp;
5265 optab optab;
5266 int icode;
5267 machine_mode optab_op2_mode;
5268 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5269 int ndts = 2;
5270 poly_uint64 nunits_in;
5271 poly_uint64 nunits_out;
5272 tree vectype_out;
5273 tree op1_vectype;
5274 int ncopies;
5275 int i;
5276 vec<tree> vec_oprnds0 = vNULL;
5277 vec<tree> vec_oprnds1 = vNULL;
5278 tree vop0, vop1;
5279 unsigned int k;
5280 bool scalar_shift_arg = true;
5281 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5282 bool incompatible_op1_vectype_p = false;
5284 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5285 return false;
5287 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5288 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5289 && ! vec_stmt)
5290 return false;
5292 /* Is STMT a vectorizable binary/unary operation? */
5293 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5294 if (!stmt)
5295 return false;
5297 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5298 return false;
5300 code = gimple_assign_rhs_code (stmt);
5302 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5303 || code == RROTATE_EXPR))
5304 return false;
5306 scalar_dest = gimple_assign_lhs (stmt);
5307 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5308 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5310 if (dump_enabled_p ())
5311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5312 "bit-precision shifts not supported.\n");
5313 return false;
5316 slp_tree slp_op0;
5317 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5318 0, &op0, &slp_op0, &dt[0], &vectype))
5320 if (dump_enabled_p ())
5321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5322 "use not simple.\n");
5323 return false;
5325 /* If op0 is an external or constant def, infer the vector type
5326 from the scalar type. */
5327 if (!vectype)
5328 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5329 if (vec_stmt)
5330 gcc_assert (vectype);
5331 if (!vectype)
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5335 "no vectype for scalar type\n");
5336 return false;
5339 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5340 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5341 if (maybe_ne (nunits_out, nunits_in))
5342 return false;
5344 stmt_vec_info op1_def_stmt_info;
5345 slp_tree slp_op1;
5346 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5347 &dt[1], &op1_vectype, &op1_def_stmt_info))
5349 if (dump_enabled_p ())
5350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5351 "use not simple.\n");
5352 return false;
5355 /* Multiple types in SLP are handled by creating the appropriate number of
5356 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5357 case of SLP. */
5358 if (slp_node)
5359 ncopies = 1;
5360 else
5361 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5363 gcc_assert (ncopies >= 1);
5365 /* Determine whether the shift amount is a vector, or scalar. If the
5366 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5368 if ((dt[1] == vect_internal_def
5369 || dt[1] == vect_induction_def
5370 || dt[1] == vect_nested_cycle)
5371 && !slp_node)
5372 scalar_shift_arg = false;
5373 else if (dt[1] == vect_constant_def
5374 || dt[1] == vect_external_def
5375 || dt[1] == vect_internal_def)
5377 /* In SLP, need to check whether the shift count is the same,
5378 in loops if it is a constant or invariant, it is always
5379 a scalar shift. */
5380 if (slp_node)
5382 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5383 stmt_vec_info slpstmt_info;
5385 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5387 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5388 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5389 scalar_shift_arg = false;
5392 /* For internal SLP defs we have to make sure we see scalar stmts
5393 for all vector elements.
5394 ??? For different vectors we could resort to a different
5395 scalar shift operand but code-generation below simply always
5396 takes the first. */
5397 if (dt[1] == vect_internal_def
5398 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5399 stmts.length ()))
5400 scalar_shift_arg = false;
5403 /* If the shift amount is computed by a pattern stmt we cannot
5404 use the scalar amount directly thus give up and use a vector
5405 shift. */
5406 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5407 scalar_shift_arg = false;
5409 else
5411 if (dump_enabled_p ())
5412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5413 "operand mode requires invariant argument.\n");
5414 return false;
5417 /* Vector shifted by vector. */
5418 bool was_scalar_shift_arg = scalar_shift_arg;
5419 if (!scalar_shift_arg)
5421 optab = optab_for_tree_code (code, vectype, optab_vector);
5422 if (dump_enabled_p ())
5423 dump_printf_loc (MSG_NOTE, vect_location,
5424 "vector/vector shift/rotate found.\n");
5426 if (!op1_vectype)
5427 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5428 slp_op1);
5429 incompatible_op1_vectype_p
5430 = (op1_vectype == NULL_TREE
5431 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5432 TYPE_VECTOR_SUBPARTS (vectype))
5433 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5434 if (incompatible_op1_vectype_p
5435 && (!slp_node
5436 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5437 || slp_op1->refcnt != 1))
5439 if (dump_enabled_p ())
5440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5441 "unusable type for last operand in"
5442 " vector/vector shift/rotate.\n");
5443 return false;
5446 /* See if the machine has a vector shifted by scalar insn and if not
5447 then see if it has a vector shifted by vector insn. */
5448 else
5450 optab = optab_for_tree_code (code, vectype, optab_scalar);
5451 if (optab
5452 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5454 if (dump_enabled_p ())
5455 dump_printf_loc (MSG_NOTE, vect_location,
5456 "vector/scalar shift/rotate found.\n");
5458 else
5460 optab = optab_for_tree_code (code, vectype, optab_vector);
5461 if (optab
5462 && (optab_handler (optab, TYPE_MODE (vectype))
5463 != CODE_FOR_nothing))
5465 scalar_shift_arg = false;
5467 if (dump_enabled_p ())
5468 dump_printf_loc (MSG_NOTE, vect_location,
5469 "vector/vector shift/rotate found.\n");
5471 if (!op1_vectype)
5472 op1_vectype = get_vectype_for_scalar_type (vinfo,
5473 TREE_TYPE (op1),
5474 slp_op1);
5476 /* Unlike the other binary operators, shifts/rotates have
5477 the rhs being int, instead of the same type as the lhs,
5478 so make sure the scalar is the right type if we are
5479 dealing with vectors of long long/long/short/char. */
5480 incompatible_op1_vectype_p
5481 = (!op1_vectype
5482 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5483 TREE_TYPE (op1)));
5484 if (incompatible_op1_vectype_p
5485 && dt[1] == vect_internal_def)
5487 if (dump_enabled_p ())
5488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5489 "unusable type for last operand in"
5490 " vector/vector shift/rotate.\n");
5491 return false;
5497 /* Supportable by target? */
5498 if (!optab)
5500 if (dump_enabled_p ())
5501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5502 "no optab.\n");
5503 return false;
5505 vec_mode = TYPE_MODE (vectype);
5506 icode = (int) optab_handler (optab, vec_mode);
5507 if (icode == CODE_FOR_nothing)
5509 if (dump_enabled_p ())
5510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5511 "op not supported by target.\n");
5512 /* Check only during analysis. */
5513 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5514 || (!vec_stmt
5515 && !vect_worthwhile_without_simd_p (vinfo, code)))
5516 return false;
5517 if (dump_enabled_p ())
5518 dump_printf_loc (MSG_NOTE, vect_location,
5519 "proceeding using word mode.\n");
5522 /* Worthwhile without SIMD support? Check only during analysis. */
5523 if (!vec_stmt
5524 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5525 && !vect_worthwhile_without_simd_p (vinfo, code))
5527 if (dump_enabled_p ())
5528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5529 "not worthwhile without SIMD support.\n");
5530 return false;
5533 if (!vec_stmt) /* transformation not required. */
5535 if (slp_node
5536 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5537 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5538 && (!incompatible_op1_vectype_p
5539 || dt[1] == vect_constant_def)
5540 && !vect_maybe_update_slp_op_vectype
5541 (slp_op1,
5542 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5544 if (dump_enabled_p ())
5545 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5546 "incompatible vector types for invariants\n");
5547 return false;
5549 /* Now adjust the constant shift amount in place. */
5550 if (slp_node
5551 && incompatible_op1_vectype_p
5552 && dt[1] == vect_constant_def)
5554 for (unsigned i = 0;
5555 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5557 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5558 = fold_convert (TREE_TYPE (vectype),
5559 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5560 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5561 == INTEGER_CST));
5564 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5565 DUMP_VECT_SCOPE ("vectorizable_shift");
5566 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5567 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5568 return true;
5571 /* Transform. */
5573 if (dump_enabled_p ())
5574 dump_printf_loc (MSG_NOTE, vect_location,
5575 "transform binary/unary operation.\n");
5577 if (incompatible_op1_vectype_p && !slp_node)
5579 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5580 op1 = fold_convert (TREE_TYPE (vectype), op1);
5581 if (dt[1] != vect_constant_def)
5582 op1 = vect_init_vector (vinfo, stmt_info, op1,
5583 TREE_TYPE (vectype), NULL);
5586 /* Handle def. */
5587 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5589 if (scalar_shift_arg && dt[1] != vect_internal_def)
5591 /* Vector shl and shr insn patterns can be defined with scalar
5592 operand 2 (shift operand). In this case, use constant or loop
5593 invariant op1 directly, without extending it to vector mode
5594 first. */
5595 optab_op2_mode = insn_data[icode].operand[2].mode;
5596 if (!VECTOR_MODE_P (optab_op2_mode))
5598 if (dump_enabled_p ())
5599 dump_printf_loc (MSG_NOTE, vect_location,
5600 "operand 1 using scalar mode.\n");
5601 vec_oprnd1 = op1;
5602 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5603 vec_oprnds1.quick_push (vec_oprnd1);
5604 /* Store vec_oprnd1 for every vector stmt to be created.
5605 We check during the analysis that all the shift arguments
5606 are the same.
5607 TODO: Allow different constants for different vector
5608 stmts generated for an SLP instance. */
5609 for (k = 0;
5610 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5611 vec_oprnds1.quick_push (vec_oprnd1);
5614 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5616 if (was_scalar_shift_arg)
5618 /* If the argument was the same in all lanes create
5619 the correctly typed vector shift amount directly. */
5620 op1 = fold_convert (TREE_TYPE (vectype), op1);
5621 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5622 !loop_vinfo ? gsi : NULL);
5623 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5624 !loop_vinfo ? gsi : NULL);
5625 vec_oprnds1.create (slp_node->vec_stmts_size);
5626 for (k = 0; k < slp_node->vec_stmts_size; k++)
5627 vec_oprnds1.quick_push (vec_oprnd1);
5629 else if (dt[1] == vect_constant_def)
5630 /* The constant shift amount has been adjusted in place. */
5632 else
5633 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5636 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5637 (a special case for certain kind of vector shifts); otherwise,
5638 operand 1 should be of a vector type (the usual case). */
5639 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5640 op0, &vec_oprnds0,
5641 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5643 /* Arguments are ready. Create the new vector stmt. */
5644 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5646 /* For internal defs where we need to use a scalar shift arg
5647 extract the first lane. */
5648 if (scalar_shift_arg && dt[1] == vect_internal_def)
5650 vop1 = vec_oprnds1[0];
5651 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5652 gassign *new_stmt
5653 = gimple_build_assign (new_temp,
5654 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5655 vop1,
5656 TYPE_SIZE (TREE_TYPE (new_temp)),
5657 bitsize_zero_node));
5658 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5659 vop1 = new_temp;
5661 else
5662 vop1 = vec_oprnds1[i];
5663 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5664 new_temp = make_ssa_name (vec_dest, new_stmt);
5665 gimple_assign_set_lhs (new_stmt, new_temp);
5666 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5667 if (slp_node)
5668 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5669 else
5670 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5673 if (!slp_node)
5674 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5676 vec_oprnds0.release ();
5677 vec_oprnds1.release ();
5679 return true;
5683 /* Function vectorizable_operation.
5685 Check if STMT_INFO performs a binary, unary or ternary operation that can
5686 be vectorized.
5687 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5688 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5689 Return true if STMT_INFO is vectorizable in this way. */
5691 static bool
5692 vectorizable_operation (vec_info *vinfo,
5693 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5694 gimple **vec_stmt, slp_tree slp_node,
5695 stmt_vector_for_cost *cost_vec)
5697 tree vec_dest;
5698 tree scalar_dest;
5699 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5700 tree vectype;
5701 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5702 enum tree_code code, orig_code;
5703 machine_mode vec_mode;
5704 tree new_temp;
5705 int op_type;
5706 optab optab;
5707 bool target_support_p;
5708 enum vect_def_type dt[3]
5709 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5710 int ndts = 3;
5711 poly_uint64 nunits_in;
5712 poly_uint64 nunits_out;
5713 tree vectype_out;
5714 int ncopies, vec_num;
5715 int i;
5716 vec<tree> vec_oprnds0 = vNULL;
5717 vec<tree> vec_oprnds1 = vNULL;
5718 vec<tree> vec_oprnds2 = vNULL;
5719 tree vop0, vop1, vop2;
5720 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5722 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5723 return false;
5725 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5726 && ! vec_stmt)
5727 return false;
5729 /* Is STMT a vectorizable binary/unary operation? */
5730 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5731 if (!stmt)
5732 return false;
5734 /* Loads and stores are handled in vectorizable_{load,store}. */
5735 if (STMT_VINFO_DATA_REF (stmt_info))
5736 return false;
5738 orig_code = code = gimple_assign_rhs_code (stmt);
5740 /* Shifts are handled in vectorizable_shift. */
5741 if (code == LSHIFT_EXPR
5742 || code == RSHIFT_EXPR
5743 || code == LROTATE_EXPR
5744 || code == RROTATE_EXPR)
5745 return false;
5747 /* Comparisons are handled in vectorizable_comparison. */
5748 if (TREE_CODE_CLASS (code) == tcc_comparison)
5749 return false;
5751 /* Conditions are handled in vectorizable_condition. */
5752 if (code == COND_EXPR)
5753 return false;
5755 /* For pointer addition and subtraction, we should use the normal
5756 plus and minus for the vector operation. */
5757 if (code == POINTER_PLUS_EXPR)
5758 code = PLUS_EXPR;
5759 if (code == POINTER_DIFF_EXPR)
5760 code = MINUS_EXPR;
5762 /* Support only unary or binary operations. */
5763 op_type = TREE_CODE_LENGTH (code);
5764 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5766 if (dump_enabled_p ())
5767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5768 "num. args = %d (not unary/binary/ternary op).\n",
5769 op_type);
5770 return false;
5773 scalar_dest = gimple_assign_lhs (stmt);
5774 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5776 /* Most operations cannot handle bit-precision types without extra
5777 truncations. */
5778 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5779 if (!mask_op_p
5780 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5781 /* Exception are bitwise binary operations. */
5782 && code != BIT_IOR_EXPR
5783 && code != BIT_XOR_EXPR
5784 && code != BIT_AND_EXPR)
5786 if (dump_enabled_p ())
5787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5788 "bit-precision arithmetic not supported.\n");
5789 return false;
5792 slp_tree slp_op0;
5793 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5794 0, &op0, &slp_op0, &dt[0], &vectype))
5796 if (dump_enabled_p ())
5797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5798 "use not simple.\n");
5799 return false;
5801 /* If op0 is an external or constant def, infer the vector type
5802 from the scalar type. */
5803 if (!vectype)
5805 /* For boolean type we cannot determine vectype by
5806 invariant value (don't know whether it is a vector
5807 of booleans or vector of integers). We use output
5808 vectype because operations on boolean don't change
5809 type. */
5810 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5812 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5814 if (dump_enabled_p ())
5815 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5816 "not supported operation on bool value.\n");
5817 return false;
5819 vectype = vectype_out;
5821 else
5822 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5823 slp_node);
5825 if (vec_stmt)
5826 gcc_assert (vectype);
5827 if (!vectype)
5829 if (dump_enabled_p ())
5830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5831 "no vectype for scalar type %T\n",
5832 TREE_TYPE (op0));
5834 return false;
5837 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5838 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5839 if (maybe_ne (nunits_out, nunits_in))
5840 return false;
5842 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5843 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5844 if (op_type == binary_op || op_type == ternary_op)
5846 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5847 1, &op1, &slp_op1, &dt[1], &vectype2))
5849 if (dump_enabled_p ())
5850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5851 "use not simple.\n");
5852 return false;
5855 if (op_type == ternary_op)
5857 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5858 2, &op2, &slp_op2, &dt[2], &vectype3))
5860 if (dump_enabled_p ())
5861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5862 "use not simple.\n");
5863 return false;
5867 /* Multiple types in SLP are handled by creating the appropriate number of
5868 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5869 case of SLP. */
5870 if (slp_node)
5872 ncopies = 1;
5873 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5875 else
5877 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5878 vec_num = 1;
5881 gcc_assert (ncopies >= 1);
5883 /* Reject attempts to combine mask types with nonmask types, e.g. if
5884 we have an AND between a (nonmask) boolean loaded from memory and
5885 a (mask) boolean result of a comparison.
5887 TODO: We could easily fix these cases up using pattern statements. */
5888 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5889 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5890 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5892 if (dump_enabled_p ())
5893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5894 "mixed mask and nonmask vector types\n");
5895 return false;
5898 /* Supportable by target? */
5900 vec_mode = TYPE_MODE (vectype);
5901 if (code == MULT_HIGHPART_EXPR)
5902 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5903 else
5905 optab = optab_for_tree_code (code, vectype, optab_default);
5906 if (!optab)
5908 if (dump_enabled_p ())
5909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5910 "no optab.\n");
5911 return false;
5913 target_support_p = (optab_handler (optab, vec_mode)
5914 != CODE_FOR_nothing);
5917 if (!target_support_p)
5919 if (dump_enabled_p ())
5920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5921 "op not supported by target.\n");
5922 /* Check only during analysis. */
5923 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5924 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5925 return false;
5926 if (dump_enabled_p ())
5927 dump_printf_loc (MSG_NOTE, vect_location,
5928 "proceeding using word mode.\n");
5931 /* Worthwhile without SIMD support? Check only during analysis. */
5932 if (!VECTOR_MODE_P (vec_mode)
5933 && !vec_stmt
5934 && !vect_worthwhile_without_simd_p (vinfo, code))
5936 if (dump_enabled_p ())
5937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5938 "not worthwhile without SIMD support.\n");
5939 return false;
5942 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
5943 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
5944 internal_fn cond_fn = get_conditional_internal_fn (code);
5946 if (!vec_stmt) /* transformation not required. */
5948 /* If this operation is part of a reduction, a fully-masked loop
5949 should only change the active lanes of the reduction chain,
5950 keeping the inactive lanes as-is. */
5951 if (loop_vinfo
5952 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5953 && reduc_idx >= 0)
5955 if (cond_fn == IFN_LAST
5956 || !direct_internal_fn_supported_p (cond_fn, vectype,
5957 OPTIMIZE_FOR_SPEED))
5959 if (dump_enabled_p ())
5960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5961 "can't use a fully-masked loop because no"
5962 " conditional operation is available.\n");
5963 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
5965 else
5966 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
5967 vectype, NULL);
5970 /* Put types on constant and invariant SLP children. */
5971 if (slp_node
5972 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5973 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
5974 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
5976 if (dump_enabled_p ())
5977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5978 "incompatible vector types for invariants\n");
5979 return false;
5982 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5983 DUMP_VECT_SCOPE ("vectorizable_operation");
5984 vect_model_simple_cost (vinfo, stmt_info,
5985 ncopies, dt, ndts, slp_node, cost_vec);
5986 return true;
5989 /* Transform. */
5991 if (dump_enabled_p ())
5992 dump_printf_loc (MSG_NOTE, vect_location,
5993 "transform binary/unary operation.\n");
5995 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
5997 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5998 vectors with unsigned elements, but the result is signed. So, we
5999 need to compute the MINUS_EXPR into vectype temporary and
6000 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6001 tree vec_cvt_dest = NULL_TREE;
6002 if (orig_code == POINTER_DIFF_EXPR)
6004 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6005 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6007 /* Handle def. */
6008 else
6009 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6011 /* In case the vectorization factor (VF) is bigger than the number
6012 of elements that we can fit in a vectype (nunits), we have to generate
6013 more than one vector stmt - i.e - we need to "unroll" the
6014 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6015 from one copy of the vector stmt to the next, in the field
6016 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6017 stages to find the correct vector defs to be used when vectorizing
6018 stmts that use the defs of the current stmt. The example below
6019 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6020 we need to create 4 vectorized stmts):
6022 before vectorization:
6023 RELATED_STMT VEC_STMT
6024 S1: x = memref - -
6025 S2: z = x + 1 - -
6027 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6028 there):
6029 RELATED_STMT VEC_STMT
6030 VS1_0: vx0 = memref0 VS1_1 -
6031 VS1_1: vx1 = memref1 VS1_2 -
6032 VS1_2: vx2 = memref2 VS1_3 -
6033 VS1_3: vx3 = memref3 - -
6034 S1: x = load - VS1_0
6035 S2: z = x + 1 - -
6037 step2: vectorize stmt S2 (done here):
6038 To vectorize stmt S2 we first need to find the relevant vector
6039 def for the first operand 'x'. This is, as usual, obtained from
6040 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6041 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6042 relevant vector def 'vx0'. Having found 'vx0' we can generate
6043 the vector stmt VS2_0, and as usual, record it in the
6044 STMT_VINFO_VEC_STMT of stmt S2.
6045 When creating the second copy (VS2_1), we obtain the relevant vector
6046 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6047 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6048 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6049 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6050 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6051 chain of stmts and pointers:
6052 RELATED_STMT VEC_STMT
6053 VS1_0: vx0 = memref0 VS1_1 -
6054 VS1_1: vx1 = memref1 VS1_2 -
6055 VS1_2: vx2 = memref2 VS1_3 -
6056 VS1_3: vx3 = memref3 - -
6057 S1: x = load - VS1_0
6058 VS2_0: vz0 = vx0 + v1 VS2_1 -
6059 VS2_1: vz1 = vx1 + v1 VS2_2 -
6060 VS2_2: vz2 = vx2 + v1 VS2_3 -
6061 VS2_3: vz3 = vx3 + v1 - -
6062 S2: z = x + 1 - VS2_0 */
6064 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6065 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6066 /* Arguments are ready. Create the new vector stmt. */
6067 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6069 gimple *new_stmt = NULL;
6070 vop1 = ((op_type == binary_op || op_type == ternary_op)
6071 ? vec_oprnds1[i] : NULL_TREE);
6072 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6073 if (masked_loop_p && reduc_idx >= 0)
6075 /* Perform the operation on active elements only and take
6076 inactive elements from the reduction chain input. */
6077 gcc_assert (!vop2);
6078 vop2 = reduc_idx == 1 ? vop1 : vop0;
6079 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6080 vectype, i);
6081 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6082 vop0, vop1, vop2);
6083 new_temp = make_ssa_name (vec_dest, call);
6084 gimple_call_set_lhs (call, new_temp);
6085 gimple_call_set_nothrow (call, true);
6086 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6087 new_stmt = call;
6089 else
6091 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6092 new_temp = make_ssa_name (vec_dest, new_stmt);
6093 gimple_assign_set_lhs (new_stmt, new_temp);
6094 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6095 if (vec_cvt_dest)
6097 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6098 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6099 new_temp);
6100 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6101 gimple_assign_set_lhs (new_stmt, new_temp);
6102 vect_finish_stmt_generation (vinfo, stmt_info,
6103 new_stmt, gsi);
6106 if (slp_node)
6107 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6108 else
6109 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6112 if (!slp_node)
6113 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6115 vec_oprnds0.release ();
6116 vec_oprnds1.release ();
6117 vec_oprnds2.release ();
6119 return true;
6122 /* A helper function to ensure data reference DR_INFO's base alignment. */
6124 static void
6125 ensure_base_align (dr_vec_info *dr_info)
6127 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6128 return;
6130 if (dr_info->base_misaligned)
6132 tree base_decl = dr_info->base_decl;
6134 // We should only be able to increase the alignment of a base object if
6135 // we know what its new alignment should be at compile time.
6136 unsigned HOST_WIDE_INT align_base_to =
6137 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6139 if (decl_in_symtab_p (base_decl))
6140 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6141 else if (DECL_ALIGN (base_decl) < align_base_to)
6143 SET_DECL_ALIGN (base_decl, align_base_to);
6144 DECL_USER_ALIGN (base_decl) = 1;
6146 dr_info->base_misaligned = false;
6151 /* Function get_group_alias_ptr_type.
6153 Return the alias type for the group starting at FIRST_STMT_INFO. */
6155 static tree
6156 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6158 struct data_reference *first_dr, *next_dr;
6160 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6161 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6162 while (next_stmt_info)
6164 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6165 if (get_alias_set (DR_REF (first_dr))
6166 != get_alias_set (DR_REF (next_dr)))
6168 if (dump_enabled_p ())
6169 dump_printf_loc (MSG_NOTE, vect_location,
6170 "conflicting alias set types.\n");
6171 return ptr_type_node;
6173 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6175 return reference_alias_ptr_type (DR_REF (first_dr));
6179 /* Function scan_operand_equal_p.
6181 Helper function for check_scan_store. Compare two references
6182 with .GOMP_SIMD_LANE bases. */
6184 static bool
6185 scan_operand_equal_p (tree ref1, tree ref2)
6187 tree ref[2] = { ref1, ref2 };
6188 poly_int64 bitsize[2], bitpos[2];
6189 tree offset[2], base[2];
6190 for (int i = 0; i < 2; ++i)
6192 machine_mode mode;
6193 int unsignedp, reversep, volatilep = 0;
6194 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6195 &offset[i], &mode, &unsignedp,
6196 &reversep, &volatilep);
6197 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6198 return false;
6199 if (TREE_CODE (base[i]) == MEM_REF
6200 && offset[i] == NULL_TREE
6201 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6203 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6204 if (is_gimple_assign (def_stmt)
6205 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6206 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6207 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6209 if (maybe_ne (mem_ref_offset (base[i]), 0))
6210 return false;
6211 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6212 offset[i] = gimple_assign_rhs2 (def_stmt);
6217 if (!operand_equal_p (base[0], base[1], 0))
6218 return false;
6219 if (maybe_ne (bitsize[0], bitsize[1]))
6220 return false;
6221 if (offset[0] != offset[1])
6223 if (!offset[0] || !offset[1])
6224 return false;
6225 if (!operand_equal_p (offset[0], offset[1], 0))
6227 tree step[2];
6228 for (int i = 0; i < 2; ++i)
6230 step[i] = integer_one_node;
6231 if (TREE_CODE (offset[i]) == SSA_NAME)
6233 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6234 if (is_gimple_assign (def_stmt)
6235 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6236 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6237 == INTEGER_CST))
6239 step[i] = gimple_assign_rhs2 (def_stmt);
6240 offset[i] = gimple_assign_rhs1 (def_stmt);
6243 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6245 step[i] = TREE_OPERAND (offset[i], 1);
6246 offset[i] = TREE_OPERAND (offset[i], 0);
6248 tree rhs1 = NULL_TREE;
6249 if (TREE_CODE (offset[i]) == SSA_NAME)
6251 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6252 if (gimple_assign_cast_p (def_stmt))
6253 rhs1 = gimple_assign_rhs1 (def_stmt);
6255 else if (CONVERT_EXPR_P (offset[i]))
6256 rhs1 = TREE_OPERAND (offset[i], 0);
6257 if (rhs1
6258 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6259 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6260 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6261 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6262 offset[i] = rhs1;
6264 if (!operand_equal_p (offset[0], offset[1], 0)
6265 || !operand_equal_p (step[0], step[1], 0))
6266 return false;
6269 return true;
6273 enum scan_store_kind {
6274 /* Normal permutation. */
6275 scan_store_kind_perm,
6277 /* Whole vector left shift permutation with zero init. */
6278 scan_store_kind_lshift_zero,
6280 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6281 scan_store_kind_lshift_cond
6284 /* Function check_scan_store.
6286 Verify if we can perform the needed permutations or whole vector shifts.
6287 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6288 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6289 to do at each step. */
6291 static int
6292 scan_store_can_perm_p (tree vectype, tree init,
6293 vec<enum scan_store_kind> *use_whole_vector = NULL)
6295 enum machine_mode vec_mode = TYPE_MODE (vectype);
6296 unsigned HOST_WIDE_INT nunits;
6297 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6298 return -1;
6299 int units_log2 = exact_log2 (nunits);
6300 if (units_log2 <= 0)
6301 return -1;
6303 int i;
6304 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6305 for (i = 0; i <= units_log2; ++i)
6307 unsigned HOST_WIDE_INT j, k;
6308 enum scan_store_kind kind = scan_store_kind_perm;
6309 vec_perm_builder sel (nunits, nunits, 1);
6310 sel.quick_grow (nunits);
6311 if (i == units_log2)
6313 for (j = 0; j < nunits; ++j)
6314 sel[j] = nunits - 1;
6316 else
6318 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6319 sel[j] = j;
6320 for (k = 0; j < nunits; ++j, ++k)
6321 sel[j] = nunits + k;
6323 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6324 if (!can_vec_perm_const_p (vec_mode, indices))
6326 if (i == units_log2)
6327 return -1;
6329 if (whole_vector_shift_kind == scan_store_kind_perm)
6331 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6332 return -1;
6333 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6334 /* Whole vector shifts shift in zeros, so if init is all zero
6335 constant, there is no need to do anything further. */
6336 if ((TREE_CODE (init) != INTEGER_CST
6337 && TREE_CODE (init) != REAL_CST)
6338 || !initializer_zerop (init))
6340 tree masktype = truth_type_for (vectype);
6341 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6342 return -1;
6343 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6346 kind = whole_vector_shift_kind;
6348 if (use_whole_vector)
6350 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6351 use_whole_vector->safe_grow_cleared (i);
6352 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6353 use_whole_vector->safe_push (kind);
6357 return units_log2;
6361 /* Function check_scan_store.
6363 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6365 static bool
6366 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6367 enum vect_def_type rhs_dt, bool slp, tree mask,
6368 vect_memory_access_type memory_access_type)
6370 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6371 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6372 tree ref_type;
6374 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6375 if (slp
6376 || mask
6377 || memory_access_type != VMAT_CONTIGUOUS
6378 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6379 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6380 || loop_vinfo == NULL
6381 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6382 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6383 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6384 || !integer_zerop (DR_INIT (dr_info->dr))
6385 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6386 || !alias_sets_conflict_p (get_alias_set (vectype),
6387 get_alias_set (TREE_TYPE (ref_type))))
6389 if (dump_enabled_p ())
6390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6391 "unsupported OpenMP scan store.\n");
6392 return false;
6395 /* We need to pattern match code built by OpenMP lowering and simplified
6396 by following optimizations into something we can handle.
6397 #pragma omp simd reduction(inscan,+:r)
6398 for (...)
6400 r += something ();
6401 #pragma omp scan inclusive (r)
6402 use (r);
6404 shall have body with:
6405 // Initialization for input phase, store the reduction initializer:
6406 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6407 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6408 D.2042[_21] = 0;
6409 // Actual input phase:
6411 r.0_5 = D.2042[_20];
6412 _6 = _4 + r.0_5;
6413 D.2042[_20] = _6;
6414 // Initialization for scan phase:
6415 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6416 _26 = D.2043[_25];
6417 _27 = D.2042[_25];
6418 _28 = _26 + _27;
6419 D.2043[_25] = _28;
6420 D.2042[_25] = _28;
6421 // Actual scan phase:
6423 r.1_8 = D.2042[_20];
6425 The "omp simd array" variable D.2042 holds the privatized copy used
6426 inside of the loop and D.2043 is another one that holds copies of
6427 the current original list item. The separate GOMP_SIMD_LANE ifn
6428 kinds are there in order to allow optimizing the initializer store
6429 and combiner sequence, e.g. if it is originally some C++ish user
6430 defined reduction, but allow the vectorizer to pattern recognize it
6431 and turn into the appropriate vectorized scan.
6433 For exclusive scan, this is slightly different:
6434 #pragma omp simd reduction(inscan,+:r)
6435 for (...)
6437 use (r);
6438 #pragma omp scan exclusive (r)
6439 r += something ();
6441 shall have body with:
6442 // Initialization for input phase, store the reduction initializer:
6443 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6444 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6445 D.2042[_21] = 0;
6446 // Actual input phase:
6448 r.0_5 = D.2042[_20];
6449 _6 = _4 + r.0_5;
6450 D.2042[_20] = _6;
6451 // Initialization for scan phase:
6452 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6453 _26 = D.2043[_25];
6454 D.2044[_25] = _26;
6455 _27 = D.2042[_25];
6456 _28 = _26 + _27;
6457 D.2043[_25] = _28;
6458 // Actual scan phase:
6460 r.1_8 = D.2044[_20];
6461 ... */
6463 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6465 /* Match the D.2042[_21] = 0; store above. Just require that
6466 it is a constant or external definition store. */
6467 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6469 fail_init:
6470 if (dump_enabled_p ())
6471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6472 "unsupported OpenMP scan initializer store.\n");
6473 return false;
6476 if (! loop_vinfo->scan_map)
6477 loop_vinfo->scan_map = new hash_map<tree, tree>;
6478 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6479 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6480 if (cached)
6481 goto fail_init;
6482 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6484 /* These stores can be vectorized normally. */
6485 return true;
6488 if (rhs_dt != vect_internal_def)
6490 fail:
6491 if (dump_enabled_p ())
6492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6493 "unsupported OpenMP scan combiner pattern.\n");
6494 return false;
6497 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6498 tree rhs = gimple_assign_rhs1 (stmt);
6499 if (TREE_CODE (rhs) != SSA_NAME)
6500 goto fail;
6502 gimple *other_store_stmt = NULL;
6503 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6504 bool inscan_var_store
6505 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6507 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6509 if (!inscan_var_store)
6511 use_operand_p use_p;
6512 imm_use_iterator iter;
6513 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6515 gimple *use_stmt = USE_STMT (use_p);
6516 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6517 continue;
6518 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6519 || !is_gimple_assign (use_stmt)
6520 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6521 || other_store_stmt
6522 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6523 goto fail;
6524 other_store_stmt = use_stmt;
6526 if (other_store_stmt == NULL)
6527 goto fail;
6528 rhs = gimple_assign_lhs (other_store_stmt);
6529 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6530 goto fail;
6533 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6535 use_operand_p use_p;
6536 imm_use_iterator iter;
6537 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6539 gimple *use_stmt = USE_STMT (use_p);
6540 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6541 continue;
6542 if (other_store_stmt)
6543 goto fail;
6544 other_store_stmt = use_stmt;
6547 else
6548 goto fail;
6550 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6551 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6552 || !is_gimple_assign (def_stmt)
6553 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6554 goto fail;
6556 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6557 /* For pointer addition, we should use the normal plus for the vector
6558 operation. */
6559 switch (code)
6561 case POINTER_PLUS_EXPR:
6562 code = PLUS_EXPR;
6563 break;
6564 case MULT_HIGHPART_EXPR:
6565 goto fail;
6566 default:
6567 break;
6569 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6570 goto fail;
6572 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6573 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6574 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6575 goto fail;
6577 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6578 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6579 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6580 || !gimple_assign_load_p (load1_stmt)
6581 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6582 || !gimple_assign_load_p (load2_stmt))
6583 goto fail;
6585 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6586 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6587 if (load1_stmt_info == NULL
6588 || load2_stmt_info == NULL
6589 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6590 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6591 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6592 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6593 goto fail;
6595 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6597 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6598 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6599 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6600 goto fail;
6601 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6602 tree lrhs;
6603 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6604 lrhs = rhs1;
6605 else
6606 lrhs = rhs2;
6607 use_operand_p use_p;
6608 imm_use_iterator iter;
6609 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6611 gimple *use_stmt = USE_STMT (use_p);
6612 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6613 continue;
6614 if (other_store_stmt)
6615 goto fail;
6616 other_store_stmt = use_stmt;
6620 if (other_store_stmt == NULL)
6621 goto fail;
6622 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6623 || !gimple_store_p (other_store_stmt))
6624 goto fail;
6626 stmt_vec_info other_store_stmt_info
6627 = loop_vinfo->lookup_stmt (other_store_stmt);
6628 if (other_store_stmt_info == NULL
6629 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6630 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6631 goto fail;
6633 gimple *stmt1 = stmt;
6634 gimple *stmt2 = other_store_stmt;
6635 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6636 std::swap (stmt1, stmt2);
6637 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6638 gimple_assign_rhs1 (load2_stmt)))
6640 std::swap (rhs1, rhs2);
6641 std::swap (load1_stmt, load2_stmt);
6642 std::swap (load1_stmt_info, load2_stmt_info);
6644 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6645 gimple_assign_rhs1 (load1_stmt)))
6646 goto fail;
6648 tree var3 = NULL_TREE;
6649 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6650 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6651 gimple_assign_rhs1 (load2_stmt)))
6652 goto fail;
6653 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6655 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6656 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6657 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6658 goto fail;
6659 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6660 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6661 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6662 || lookup_attribute ("omp simd inscan exclusive",
6663 DECL_ATTRIBUTES (var3)))
6664 goto fail;
6667 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6668 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6669 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6670 goto fail;
6672 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6673 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6674 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6675 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6676 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6677 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6678 goto fail;
6680 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6681 std::swap (var1, var2);
6683 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6685 if (!lookup_attribute ("omp simd inscan exclusive",
6686 DECL_ATTRIBUTES (var1)))
6687 goto fail;
6688 var1 = var3;
6691 if (loop_vinfo->scan_map == NULL)
6692 goto fail;
6693 tree *init = loop_vinfo->scan_map->get (var1);
6694 if (init == NULL)
6695 goto fail;
6697 /* The IL is as expected, now check if we can actually vectorize it.
6698 Inclusive scan:
6699 _26 = D.2043[_25];
6700 _27 = D.2042[_25];
6701 _28 = _26 + _27;
6702 D.2043[_25] = _28;
6703 D.2042[_25] = _28;
6704 should be vectorized as (where _40 is the vectorized rhs
6705 from the D.2042[_21] = 0; store):
6706 _30 = MEM <vector(8) int> [(int *)&D.2043];
6707 _31 = MEM <vector(8) int> [(int *)&D.2042];
6708 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6709 _33 = _31 + _32;
6710 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6711 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6712 _35 = _33 + _34;
6713 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6714 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6715 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6716 _37 = _35 + _36;
6717 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6718 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6719 _38 = _30 + _37;
6720 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6721 MEM <vector(8) int> [(int *)&D.2043] = _39;
6722 MEM <vector(8) int> [(int *)&D.2042] = _38;
6723 Exclusive scan:
6724 _26 = D.2043[_25];
6725 D.2044[_25] = _26;
6726 _27 = D.2042[_25];
6727 _28 = _26 + _27;
6728 D.2043[_25] = _28;
6729 should be vectorized as (where _40 is the vectorized rhs
6730 from the D.2042[_21] = 0; store):
6731 _30 = MEM <vector(8) int> [(int *)&D.2043];
6732 _31 = MEM <vector(8) int> [(int *)&D.2042];
6733 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6734 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6735 _34 = _32 + _33;
6736 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6737 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6738 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6739 _36 = _34 + _35;
6740 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6741 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6742 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6743 _38 = _36 + _37;
6744 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6745 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6746 _39 = _30 + _38;
6747 _50 = _31 + _39;
6748 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6749 MEM <vector(8) int> [(int *)&D.2044] = _39;
6750 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6751 enum machine_mode vec_mode = TYPE_MODE (vectype);
6752 optab optab = optab_for_tree_code (code, vectype, optab_default);
6753 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6754 goto fail;
6756 int units_log2 = scan_store_can_perm_p (vectype, *init);
6757 if (units_log2 == -1)
6758 goto fail;
6760 return true;
6764 /* Function vectorizable_scan_store.
6766 Helper of vectorizable_score, arguments like on vectorizable_store.
6767 Handle only the transformation, checking is done in check_scan_store. */
6769 static bool
6770 vectorizable_scan_store (vec_info *vinfo,
6771 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6772 gimple **vec_stmt, int ncopies)
6774 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6775 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6776 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6777 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6779 if (dump_enabled_p ())
6780 dump_printf_loc (MSG_NOTE, vect_location,
6781 "transform scan store. ncopies = %d\n", ncopies);
6783 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6784 tree rhs = gimple_assign_rhs1 (stmt);
6785 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6787 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6788 bool inscan_var_store
6789 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6791 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6793 use_operand_p use_p;
6794 imm_use_iterator iter;
6795 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6797 gimple *use_stmt = USE_STMT (use_p);
6798 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6799 continue;
6800 rhs = gimple_assign_lhs (use_stmt);
6801 break;
6805 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6806 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6807 if (code == POINTER_PLUS_EXPR)
6808 code = PLUS_EXPR;
6809 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6810 && commutative_tree_code (code));
6811 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6812 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6813 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6814 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6815 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6816 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6817 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6818 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6819 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6820 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6821 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6823 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6825 std::swap (rhs1, rhs2);
6826 std::swap (var1, var2);
6827 std::swap (load1_dr_info, load2_dr_info);
6830 tree *init = loop_vinfo->scan_map->get (var1);
6831 gcc_assert (init);
6833 unsigned HOST_WIDE_INT nunits;
6834 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6835 gcc_unreachable ();
6836 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6837 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6838 gcc_assert (units_log2 > 0);
6839 auto_vec<tree, 16> perms;
6840 perms.quick_grow (units_log2 + 1);
6841 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6842 for (int i = 0; i <= units_log2; ++i)
6844 unsigned HOST_WIDE_INT j, k;
6845 vec_perm_builder sel (nunits, nunits, 1);
6846 sel.quick_grow (nunits);
6847 if (i == units_log2)
6848 for (j = 0; j < nunits; ++j)
6849 sel[j] = nunits - 1;
6850 else
6852 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6853 sel[j] = j;
6854 for (k = 0; j < nunits; ++j, ++k)
6855 sel[j] = nunits + k;
6857 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6858 if (!use_whole_vector.is_empty ()
6859 && use_whole_vector[i] != scan_store_kind_perm)
6861 if (zero_vec == NULL_TREE)
6862 zero_vec = build_zero_cst (vectype);
6863 if (masktype == NULL_TREE
6864 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6865 masktype = truth_type_for (vectype);
6866 perms[i] = vect_gen_perm_mask_any (vectype, indices);
6868 else
6869 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6872 tree vec_oprnd1 = NULL_TREE;
6873 tree vec_oprnd2 = NULL_TREE;
6874 tree vec_oprnd3 = NULL_TREE;
6875 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6876 tree dataref_offset = build_int_cst (ref_type, 0);
6877 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6878 vectype, VMAT_CONTIGUOUS);
6879 tree ldataref_ptr = NULL_TREE;
6880 tree orig = NULL_TREE;
6881 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6882 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6883 auto_vec<tree> vec_oprnds1;
6884 auto_vec<tree> vec_oprnds2;
6885 auto_vec<tree> vec_oprnds3;
6886 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6887 *init, &vec_oprnds1,
6888 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6889 rhs2, &vec_oprnds3);
6890 for (int j = 0; j < ncopies; j++)
6892 vec_oprnd1 = vec_oprnds1[j];
6893 if (ldataref_ptr == NULL)
6894 vec_oprnd2 = vec_oprnds2[j];
6895 vec_oprnd3 = vec_oprnds3[j];
6896 if (j == 0)
6897 orig = vec_oprnd3;
6898 else if (!inscan_var_store)
6899 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6901 if (ldataref_ptr)
6903 vec_oprnd2 = make_ssa_name (vectype);
6904 tree data_ref = fold_build2 (MEM_REF, vectype,
6905 unshare_expr (ldataref_ptr),
6906 dataref_offset);
6907 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
6908 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
6909 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6910 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6911 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6914 tree v = vec_oprnd2;
6915 for (int i = 0; i < units_log2; ++i)
6917 tree new_temp = make_ssa_name (vectype);
6918 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
6919 (zero_vec
6920 && (use_whole_vector[i]
6921 != scan_store_kind_perm))
6922 ? zero_vec : vec_oprnd1, v,
6923 perms[i]);
6924 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6925 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6926 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6928 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
6930 /* Whole vector shift shifted in zero bits, but if *init
6931 is not initializer_zerop, we need to replace those elements
6932 with elements from vec_oprnd1. */
6933 tree_vector_builder vb (masktype, nunits, 1);
6934 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
6935 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
6936 ? boolean_false_node : boolean_true_node);
6938 tree new_temp2 = make_ssa_name (vectype);
6939 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
6940 new_temp, vec_oprnd1);
6941 vect_finish_stmt_generation (vinfo, stmt_info,
6942 g, gsi);
6943 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6944 new_temp = new_temp2;
6947 /* For exclusive scan, perform the perms[i] permutation once
6948 more. */
6949 if (i == 0
6950 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
6951 && v == vec_oprnd2)
6953 v = new_temp;
6954 --i;
6955 continue;
6958 tree new_temp2 = make_ssa_name (vectype);
6959 g = gimple_build_assign (new_temp2, code, v, new_temp);
6960 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6961 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6963 v = new_temp2;
6966 tree new_temp = make_ssa_name (vectype);
6967 gimple *g = gimple_build_assign (new_temp, code, orig, v);
6968 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6969 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6971 tree last_perm_arg = new_temp;
6972 /* For exclusive scan, new_temp computed above is the exclusive scan
6973 prefix sum. Turn it into inclusive prefix sum for the broadcast
6974 of the last element into orig. */
6975 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6977 last_perm_arg = make_ssa_name (vectype);
6978 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
6979 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6980 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6983 orig = make_ssa_name (vectype);
6984 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
6985 last_perm_arg, perms[units_log2]);
6986 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6987 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6989 if (!inscan_var_store)
6991 tree data_ref = fold_build2 (MEM_REF, vectype,
6992 unshare_expr (dataref_ptr),
6993 dataref_offset);
6994 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
6995 g = gimple_build_assign (data_ref, new_temp);
6996 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6997 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7001 if (inscan_var_store)
7002 for (int j = 0; j < ncopies; j++)
7004 if (j != 0)
7005 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7007 tree data_ref = fold_build2 (MEM_REF, vectype,
7008 unshare_expr (dataref_ptr),
7009 dataref_offset);
7010 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7011 gimple *g = gimple_build_assign (data_ref, orig);
7012 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7013 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7015 return true;
7019 /* Function vectorizable_store.
7021 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7022 that can be vectorized.
7023 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7024 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7025 Return true if STMT_INFO is vectorizable in this way. */
7027 static bool
7028 vectorizable_store (vec_info *vinfo,
7029 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7030 gimple **vec_stmt, slp_tree slp_node,
7031 stmt_vector_for_cost *cost_vec)
7033 tree data_ref;
7034 tree op;
7035 tree vec_oprnd = NULL_TREE;
7036 tree elem_type;
7037 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7038 class loop *loop = NULL;
7039 machine_mode vec_mode;
7040 tree dummy;
7041 enum vect_def_type rhs_dt = vect_unknown_def_type;
7042 enum vect_def_type mask_dt = vect_unknown_def_type;
7043 tree dataref_ptr = NULL_TREE;
7044 tree dataref_offset = NULL_TREE;
7045 gimple *ptr_incr = NULL;
7046 int ncopies;
7047 int j;
7048 stmt_vec_info first_stmt_info;
7049 bool grouped_store;
7050 unsigned int group_size, i;
7051 vec<tree> oprnds = vNULL;
7052 vec<tree> result_chain = vNULL;
7053 tree offset = NULL_TREE;
7054 vec<tree> vec_oprnds = vNULL;
7055 bool slp = (slp_node != NULL);
7056 unsigned int vec_num;
7057 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7058 tree aggr_type;
7059 gather_scatter_info gs_info;
7060 poly_uint64 vf;
7061 vec_load_store_type vls_type;
7062 tree ref_type;
7064 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7065 return false;
7067 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7068 && ! vec_stmt)
7069 return false;
7071 /* Is vectorizable store? */
7073 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7074 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7076 tree scalar_dest = gimple_assign_lhs (assign);
7077 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7078 && is_pattern_stmt_p (stmt_info))
7079 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7080 if (TREE_CODE (scalar_dest) != ARRAY_REF
7081 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7082 && TREE_CODE (scalar_dest) != INDIRECT_REF
7083 && TREE_CODE (scalar_dest) != COMPONENT_REF
7084 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7085 && TREE_CODE (scalar_dest) != REALPART_EXPR
7086 && TREE_CODE (scalar_dest) != MEM_REF)
7087 return false;
7089 else
7091 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7092 if (!call || !gimple_call_internal_p (call))
7093 return false;
7095 internal_fn ifn = gimple_call_internal_fn (call);
7096 if (!internal_store_fn_p (ifn))
7097 return false;
7099 if (slp_node != NULL)
7101 if (dump_enabled_p ())
7102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7103 "SLP of masked stores not supported.\n");
7104 return false;
7107 int mask_index = internal_fn_mask_index (ifn);
7108 if (mask_index >= 0)
7110 mask = gimple_call_arg (call, mask_index);
7111 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7112 &mask_vectype))
7113 return false;
7117 op = vect_get_store_rhs (stmt_info);
7119 /* Cannot have hybrid store SLP -- that would mean storing to the
7120 same location twice. */
7121 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7123 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7124 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7126 if (loop_vinfo)
7128 loop = LOOP_VINFO_LOOP (loop_vinfo);
7129 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7131 else
7132 vf = 1;
7134 /* Multiple types in SLP are handled by creating the appropriate number of
7135 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7136 case of SLP. */
7137 if (slp)
7138 ncopies = 1;
7139 else
7140 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7142 gcc_assert (ncopies >= 1);
7144 /* FORNOW. This restriction should be relaxed. */
7145 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7147 if (dump_enabled_p ())
7148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7149 "multiple types in nested loop.\n");
7150 return false;
7153 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7154 op, &rhs_dt, &rhs_vectype, &vls_type))
7155 return false;
7157 elem_type = TREE_TYPE (vectype);
7158 vec_mode = TYPE_MODE (vectype);
7160 if (!STMT_VINFO_DATA_REF (stmt_info))
7161 return false;
7163 vect_memory_access_type memory_access_type;
7164 enum dr_alignment_support alignment_support_scheme;
7165 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7166 ncopies, &memory_access_type,
7167 &alignment_support_scheme, &gs_info))
7168 return false;
7170 if (mask)
7172 if (memory_access_type == VMAT_CONTIGUOUS)
7174 if (!VECTOR_MODE_P (vec_mode)
7175 || !can_vec_mask_load_store_p (vec_mode,
7176 TYPE_MODE (mask_vectype), false))
7177 return false;
7179 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7180 && (memory_access_type != VMAT_GATHER_SCATTER
7181 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7183 if (dump_enabled_p ())
7184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7185 "unsupported access type for masked store.\n");
7186 return false;
7189 else
7191 /* FORNOW. In some cases can vectorize even if data-type not supported
7192 (e.g. - array initialization with 0). */
7193 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7194 return false;
7197 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7198 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7199 && memory_access_type != VMAT_GATHER_SCATTER
7200 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7201 if (grouped_store)
7203 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7204 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7205 group_size = DR_GROUP_SIZE (first_stmt_info);
7207 else
7209 first_stmt_info = stmt_info;
7210 first_dr_info = dr_info;
7211 group_size = vec_num = 1;
7214 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7216 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7217 memory_access_type))
7218 return false;
7221 if (!vec_stmt) /* transformation not required. */
7223 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7225 if (loop_vinfo
7226 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7227 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7228 group_size, memory_access_type,
7229 &gs_info, mask);
7231 if (slp_node
7232 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7233 vectype))
7235 if (dump_enabled_p ())
7236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7237 "incompatible vector types for invariants\n");
7238 return false;
7241 if (dump_enabled_p ()
7242 && memory_access_type != VMAT_ELEMENTWISE
7243 && memory_access_type != VMAT_GATHER_SCATTER
7244 && alignment_support_scheme != dr_aligned)
7245 dump_printf_loc (MSG_NOTE, vect_location,
7246 "Vectorizing an unaligned access.\n");
7248 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7249 vect_model_store_cost (vinfo, stmt_info, ncopies,
7250 memory_access_type, vls_type, slp_node, cost_vec);
7251 return true;
7253 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7255 /* Transform. */
7257 ensure_base_align (dr_info);
7259 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7261 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7262 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7263 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7264 tree ptr, var, scale, vec_mask;
7265 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7266 tree mask_halfvectype = mask_vectype;
7267 edge pe = loop_preheader_edge (loop);
7268 gimple_seq seq;
7269 basic_block new_bb;
7270 enum { NARROW, NONE, WIDEN } modifier;
7271 poly_uint64 scatter_off_nunits
7272 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7274 if (known_eq (nunits, scatter_off_nunits))
7275 modifier = NONE;
7276 else if (known_eq (nunits * 2, scatter_off_nunits))
7278 modifier = WIDEN;
7280 /* Currently gathers and scatters are only supported for
7281 fixed-length vectors. */
7282 unsigned int count = scatter_off_nunits.to_constant ();
7283 vec_perm_builder sel (count, count, 1);
7284 for (i = 0; i < (unsigned int) count; ++i)
7285 sel.quick_push (i | (count / 2));
7287 vec_perm_indices indices (sel, 1, count);
7288 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7289 indices);
7290 gcc_assert (perm_mask != NULL_TREE);
7292 else if (known_eq (nunits, scatter_off_nunits * 2))
7294 modifier = NARROW;
7296 /* Currently gathers and scatters are only supported for
7297 fixed-length vectors. */
7298 unsigned int count = nunits.to_constant ();
7299 vec_perm_builder sel (count, count, 1);
7300 for (i = 0; i < (unsigned int) count; ++i)
7301 sel.quick_push (i | (count / 2));
7303 vec_perm_indices indices (sel, 2, count);
7304 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7305 gcc_assert (perm_mask != NULL_TREE);
7306 ncopies *= 2;
7308 if (mask)
7309 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7311 else
7312 gcc_unreachable ();
7314 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7315 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7316 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7317 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7318 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7319 scaletype = TREE_VALUE (arglist);
7321 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7322 && TREE_CODE (rettype) == VOID_TYPE);
7324 ptr = fold_convert (ptrtype, gs_info.base);
7325 if (!is_gimple_min_invariant (ptr))
7327 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7328 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7329 gcc_assert (!new_bb);
7332 if (mask == NULL_TREE)
7334 mask_arg = build_int_cst (masktype, -1);
7335 mask_arg = vect_init_vector (vinfo, stmt_info,
7336 mask_arg, masktype, NULL);
7339 scale = build_int_cst (scaletype, gs_info.scale);
7341 auto_vec<tree> vec_oprnds0;
7342 auto_vec<tree> vec_oprnds1;
7343 auto_vec<tree> vec_masks;
7344 if (mask)
7346 tree mask_vectype = truth_type_for (vectype);
7347 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7348 modifier == NARROW
7349 ? ncopies / 2 : ncopies,
7350 mask, &vec_masks, mask_vectype);
7352 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7353 modifier == WIDEN
7354 ? ncopies / 2 : ncopies,
7355 gs_info.offset, &vec_oprnds0);
7356 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7357 modifier == NARROW
7358 ? ncopies / 2 : ncopies,
7359 op, &vec_oprnds1);
7360 for (j = 0; j < ncopies; ++j)
7362 if (modifier == WIDEN)
7364 if (j & 1)
7365 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7366 perm_mask, stmt_info, gsi);
7367 else
7368 op = vec_oprnd0 = vec_oprnds0[j / 2];
7369 src = vec_oprnd1 = vec_oprnds1[j];
7370 if (mask)
7371 mask_op = vec_mask = vec_masks[j];
7373 else if (modifier == NARROW)
7375 if (j & 1)
7376 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7377 perm_mask, stmt_info, gsi);
7378 else
7379 src = vec_oprnd1 = vec_oprnds1[j / 2];
7380 op = vec_oprnd0 = vec_oprnds0[j];
7381 if (mask)
7382 mask_op = vec_mask = vec_masks[j / 2];
7384 else
7386 op = vec_oprnd0 = vec_oprnds0[j];
7387 src = vec_oprnd1 = vec_oprnds1[j];
7388 if (mask)
7389 mask_op = vec_mask = vec_masks[j];
7392 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7394 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7395 TYPE_VECTOR_SUBPARTS (srctype)));
7396 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7397 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7398 gassign *new_stmt
7399 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7400 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7401 src = var;
7404 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7406 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7407 TYPE_VECTOR_SUBPARTS (idxtype)));
7408 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7409 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7410 gassign *new_stmt
7411 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7412 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7413 op = var;
7416 if (mask)
7418 tree utype;
7419 mask_arg = mask_op;
7420 if (modifier == NARROW)
7422 var = vect_get_new_ssa_name (mask_halfvectype,
7423 vect_simple_var);
7424 gassign *new_stmt
7425 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7426 : VEC_UNPACK_LO_EXPR,
7427 mask_op);
7428 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7429 mask_arg = var;
7431 tree optype = TREE_TYPE (mask_arg);
7432 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7433 utype = masktype;
7434 else
7435 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7436 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7437 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7438 gassign *new_stmt
7439 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7440 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7441 mask_arg = var;
7442 if (!useless_type_conversion_p (masktype, utype))
7444 gcc_assert (TYPE_PRECISION (utype)
7445 <= TYPE_PRECISION (masktype));
7446 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7447 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7448 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7449 mask_arg = var;
7453 gcall *new_stmt
7454 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7455 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7457 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7459 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7460 return true;
7462 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7463 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7465 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7466 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7468 if (grouped_store)
7470 /* FORNOW */
7471 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7473 /* We vectorize all the stmts of the interleaving group when we
7474 reach the last stmt in the group. */
7475 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7476 < DR_GROUP_SIZE (first_stmt_info)
7477 && !slp)
7479 *vec_stmt = NULL;
7480 return true;
7483 if (slp)
7485 grouped_store = false;
7486 /* VEC_NUM is the number of vect stmts to be created for this
7487 group. */
7488 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7489 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7490 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7491 == first_stmt_info);
7492 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7493 op = vect_get_store_rhs (first_stmt_info);
7495 else
7496 /* VEC_NUM is the number of vect stmts to be created for this
7497 group. */
7498 vec_num = group_size;
7500 ref_type = get_group_alias_ptr_type (first_stmt_info);
7502 else
7503 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7505 if (dump_enabled_p ())
7506 dump_printf_loc (MSG_NOTE, vect_location,
7507 "transform store. ncopies = %d\n", ncopies);
7509 if (memory_access_type == VMAT_ELEMENTWISE
7510 || memory_access_type == VMAT_STRIDED_SLP)
7512 gimple_stmt_iterator incr_gsi;
7513 bool insert_after;
7514 gimple *incr;
7515 tree offvar;
7516 tree ivstep;
7517 tree running_off;
7518 tree stride_base, stride_step, alias_off;
7519 tree vec_oprnd;
7520 tree dr_offset;
7521 unsigned int g;
7522 /* Checked by get_load_store_type. */
7523 unsigned int const_nunits = nunits.to_constant ();
7525 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7526 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7528 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7529 stride_base
7530 = fold_build_pointer_plus
7531 (DR_BASE_ADDRESS (first_dr_info->dr),
7532 size_binop (PLUS_EXPR,
7533 convert_to_ptrofftype (dr_offset),
7534 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7535 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7537 /* For a store with loop-invariant (but other than power-of-2)
7538 stride (i.e. not a grouped access) like so:
7540 for (i = 0; i < n; i += stride)
7541 array[i] = ...;
7543 we generate a new induction variable and new stores from
7544 the components of the (vectorized) rhs:
7546 for (j = 0; ; j += VF*stride)
7547 vectemp = ...;
7548 tmp1 = vectemp[0];
7549 array[j] = tmp1;
7550 tmp2 = vectemp[1];
7551 array[j + stride] = tmp2;
7555 unsigned nstores = const_nunits;
7556 unsigned lnel = 1;
7557 tree ltype = elem_type;
7558 tree lvectype = vectype;
7559 if (slp)
7561 if (group_size < const_nunits
7562 && const_nunits % group_size == 0)
7564 nstores = const_nunits / group_size;
7565 lnel = group_size;
7566 ltype = build_vector_type (elem_type, group_size);
7567 lvectype = vectype;
7569 /* First check if vec_extract optab doesn't support extraction
7570 of vector elts directly. */
7571 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7572 machine_mode vmode;
7573 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7574 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7575 group_size).exists (&vmode)
7576 || (convert_optab_handler (vec_extract_optab,
7577 TYPE_MODE (vectype), vmode)
7578 == CODE_FOR_nothing))
7580 /* Try to avoid emitting an extract of vector elements
7581 by performing the extracts using an integer type of the
7582 same size, extracting from a vector of those and then
7583 re-interpreting it as the original vector type if
7584 supported. */
7585 unsigned lsize
7586 = group_size * GET_MODE_BITSIZE (elmode);
7587 unsigned int lnunits = const_nunits / group_size;
7588 /* If we can't construct such a vector fall back to
7589 element extracts from the original vector type and
7590 element size stores. */
7591 if (int_mode_for_size (lsize, 0).exists (&elmode)
7592 && VECTOR_MODE_P (TYPE_MODE (vectype))
7593 && related_vector_mode (TYPE_MODE (vectype), elmode,
7594 lnunits).exists (&vmode)
7595 && (convert_optab_handler (vec_extract_optab,
7596 vmode, elmode)
7597 != CODE_FOR_nothing))
7599 nstores = lnunits;
7600 lnel = group_size;
7601 ltype = build_nonstandard_integer_type (lsize, 1);
7602 lvectype = build_vector_type (ltype, nstores);
7604 /* Else fall back to vector extraction anyway.
7605 Fewer stores are more important than avoiding spilling
7606 of the vector we extract from. Compared to the
7607 construction case in vectorizable_load no store-forwarding
7608 issue exists here for reasonable archs. */
7611 else if (group_size >= const_nunits
7612 && group_size % const_nunits == 0)
7614 nstores = 1;
7615 lnel = const_nunits;
7616 ltype = vectype;
7617 lvectype = vectype;
7619 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7620 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7623 ivstep = stride_step;
7624 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7625 build_int_cst (TREE_TYPE (ivstep), vf));
7627 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7629 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7630 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7631 create_iv (stride_base, ivstep, NULL,
7632 loop, &incr_gsi, insert_after,
7633 &offvar, NULL);
7634 incr = gsi_stmt (incr_gsi);
7636 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7638 alias_off = build_int_cst (ref_type, 0);
7639 stmt_vec_info next_stmt_info = first_stmt_info;
7640 for (g = 0; g < group_size; g++)
7642 running_off = offvar;
7643 if (g)
7645 tree size = TYPE_SIZE_UNIT (ltype);
7646 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7647 size);
7648 tree newoff = copy_ssa_name (running_off, NULL);
7649 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7650 running_off, pos);
7651 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7652 running_off = newoff;
7654 if (!slp)
7655 op = vect_get_store_rhs (next_stmt_info);
7656 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7657 op, &vec_oprnds);
7658 unsigned int group_el = 0;
7659 unsigned HOST_WIDE_INT
7660 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7661 for (j = 0; j < ncopies; j++)
7663 vec_oprnd = vec_oprnds[j];
7664 /* Pun the vector to extract from if necessary. */
7665 if (lvectype != vectype)
7667 tree tem = make_ssa_name (lvectype);
7668 gimple *pun
7669 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7670 lvectype, vec_oprnd));
7671 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7672 vec_oprnd = tem;
7674 for (i = 0; i < nstores; i++)
7676 tree newref, newoff;
7677 gimple *incr, *assign;
7678 tree size = TYPE_SIZE (ltype);
7679 /* Extract the i'th component. */
7680 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7681 bitsize_int (i), size);
7682 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7683 size, pos);
7685 elem = force_gimple_operand_gsi (gsi, elem, true,
7686 NULL_TREE, true,
7687 GSI_SAME_STMT);
7689 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7690 group_el * elsz);
7691 newref = build2 (MEM_REF, ltype,
7692 running_off, this_off);
7693 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7695 /* And store it to *running_off. */
7696 assign = gimple_build_assign (newref, elem);
7697 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7699 group_el += lnel;
7700 if (! slp
7701 || group_el == group_size)
7703 newoff = copy_ssa_name (running_off, NULL);
7704 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7705 running_off, stride_step);
7706 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7708 running_off = newoff;
7709 group_el = 0;
7711 if (g == group_size - 1
7712 && !slp)
7714 if (j == 0 && i == 0)
7715 *vec_stmt = assign;
7716 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7720 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7721 if (slp)
7722 break;
7725 vec_oprnds.release ();
7726 return true;
7729 auto_vec<tree> dr_chain (group_size);
7730 oprnds.create (group_size);
7732 /* Gather-scatter accesses perform only component accesses, alignment
7733 is irrelevant for them. */
7734 if (memory_access_type == VMAT_GATHER_SCATTER)
7735 alignment_support_scheme = dr_unaligned_supported;
7736 else
7737 alignment_support_scheme
7738 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7740 gcc_assert (alignment_support_scheme);
7741 vec_loop_masks *loop_masks
7742 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7743 ? &LOOP_VINFO_MASKS (loop_vinfo)
7744 : NULL);
7745 vec_loop_lens *loop_lens
7746 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7747 ? &LOOP_VINFO_LENS (loop_vinfo)
7748 : NULL);
7750 /* Shouldn't go with length-based approach if fully masked. */
7751 gcc_assert (!loop_lens || !loop_masks);
7753 /* Targets with store-lane instructions must not require explicit
7754 realignment. vect_supportable_dr_alignment always returns either
7755 dr_aligned or dr_unaligned_supported for masked operations. */
7756 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7757 && !mask
7758 && !loop_masks)
7759 || alignment_support_scheme == dr_aligned
7760 || alignment_support_scheme == dr_unaligned_supported);
7762 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7763 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7764 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7766 tree bump;
7767 tree vec_offset = NULL_TREE;
7768 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7770 aggr_type = NULL_TREE;
7771 bump = NULL_TREE;
7773 else if (memory_access_type == VMAT_GATHER_SCATTER)
7775 aggr_type = elem_type;
7776 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7777 &bump, &vec_offset);
7779 else
7781 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7782 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7783 else
7784 aggr_type = vectype;
7785 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7786 memory_access_type);
7789 if (mask)
7790 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7792 /* In case the vectorization factor (VF) is bigger than the number
7793 of elements that we can fit in a vectype (nunits), we have to generate
7794 more than one vector stmt - i.e - we need to "unroll" the
7795 vector stmt by a factor VF/nunits. */
7797 /* In case of interleaving (non-unit grouped access):
7799 S1: &base + 2 = x2
7800 S2: &base = x0
7801 S3: &base + 1 = x1
7802 S4: &base + 3 = x3
7804 We create vectorized stores starting from base address (the access of the
7805 first stmt in the chain (S2 in the above example), when the last store stmt
7806 of the chain (S4) is reached:
7808 VS1: &base = vx2
7809 VS2: &base + vec_size*1 = vx0
7810 VS3: &base + vec_size*2 = vx1
7811 VS4: &base + vec_size*3 = vx3
7813 Then permutation statements are generated:
7815 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7816 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7819 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7820 (the order of the data-refs in the output of vect_permute_store_chain
7821 corresponds to the order of scalar stmts in the interleaving chain - see
7822 the documentation of vect_permute_store_chain()).
7824 In case of both multiple types and interleaving, above vector stores and
7825 permutation stmts are created for every copy. The result vector stmts are
7826 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7827 STMT_VINFO_RELATED_STMT for the next copies.
7830 auto_vec<tree> vec_masks;
7831 tree vec_mask = NULL;
7832 auto_vec<tree> vec_offsets;
7833 auto_vec<vec<tree> > gvec_oprnds;
7834 gvec_oprnds.safe_grow_cleared (group_size);
7835 for (j = 0; j < ncopies; j++)
7837 gimple *new_stmt;
7838 if (j == 0)
7840 if (slp)
7842 /* Get vectorized arguments for SLP_NODE. */
7843 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7844 op, &vec_oprnds);
7845 vec_oprnd = vec_oprnds[0];
7847 else
7849 /* For interleaved stores we collect vectorized defs for all the
7850 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7851 used as an input to vect_permute_store_chain().
7853 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7854 and OPRNDS are of size 1. */
7855 stmt_vec_info next_stmt_info = first_stmt_info;
7856 for (i = 0; i < group_size; i++)
7858 /* Since gaps are not supported for interleaved stores,
7859 DR_GROUP_SIZE is the exact number of stmts in the chain.
7860 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7861 that there is no interleaving, DR_GROUP_SIZE is 1,
7862 and only one iteration of the loop will be executed. */
7863 op = vect_get_store_rhs (next_stmt_info);
7864 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7865 ncopies, op, &gvec_oprnds[i]);
7866 vec_oprnd = gvec_oprnds[i][0];
7867 dr_chain.quick_push (gvec_oprnds[i][0]);
7868 oprnds.quick_push (gvec_oprnds[i][0]);
7869 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7871 if (mask)
7873 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7874 mask, &vec_masks, mask_vectype);
7875 vec_mask = vec_masks[0];
7879 /* We should have catched mismatched types earlier. */
7880 gcc_assert (useless_type_conversion_p (vectype,
7881 TREE_TYPE (vec_oprnd)));
7882 bool simd_lane_access_p
7883 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7884 if (simd_lane_access_p
7885 && !loop_masks
7886 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7887 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7888 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7889 && integer_zerop (DR_INIT (first_dr_info->dr))
7890 && alias_sets_conflict_p (get_alias_set (aggr_type),
7891 get_alias_set (TREE_TYPE (ref_type))))
7893 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7894 dataref_offset = build_int_cst (ref_type, 0);
7896 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7898 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
7899 &dataref_ptr, &vec_offsets, ncopies);
7900 vec_offset = vec_offsets[0];
7902 else
7903 dataref_ptr
7904 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
7905 simd_lane_access_p ? loop : NULL,
7906 offset, &dummy, gsi, &ptr_incr,
7907 simd_lane_access_p, NULL_TREE, bump);
7909 else
7911 /* For interleaved stores we created vectorized defs for all the
7912 defs stored in OPRNDS in the previous iteration (previous copy).
7913 DR_CHAIN is then used as an input to vect_permute_store_chain().
7914 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7915 OPRNDS are of size 1. */
7916 for (i = 0; i < group_size; i++)
7918 vec_oprnd = gvec_oprnds[i][j];
7919 dr_chain[i] = gvec_oprnds[i][j];
7920 oprnds[i] = gvec_oprnds[i][j];
7922 if (mask)
7923 vec_mask = vec_masks[j];
7924 if (dataref_offset)
7925 dataref_offset
7926 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7927 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7928 vec_offset = vec_offsets[j];
7929 else
7930 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
7931 stmt_info, bump);
7934 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7936 tree vec_array;
7938 /* Get an array into which we can store the individual vectors. */
7939 vec_array = create_vector_array (vectype, vec_num);
7941 /* Invalidate the current contents of VEC_ARRAY. This should
7942 become an RTL clobber too, which prevents the vector registers
7943 from being upward-exposed. */
7944 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7946 /* Store the individual vectors into the array. */
7947 for (i = 0; i < vec_num; i++)
7949 vec_oprnd = dr_chain[i];
7950 write_vector_array (vinfo, stmt_info,
7951 gsi, vec_oprnd, vec_array, i);
7954 tree final_mask = NULL;
7955 if (loop_masks)
7956 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7957 vectype, j);
7958 if (vec_mask)
7959 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7960 vec_mask, gsi);
7962 gcall *call;
7963 if (final_mask)
7965 /* Emit:
7966 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7967 VEC_ARRAY). */
7968 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7969 tree alias_ptr = build_int_cst (ref_type, align);
7970 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7971 dataref_ptr, alias_ptr,
7972 final_mask, vec_array);
7974 else
7976 /* Emit:
7977 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7978 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7979 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7980 vec_array);
7981 gimple_call_set_lhs (call, data_ref);
7983 gimple_call_set_nothrow (call, true);
7984 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7985 new_stmt = call;
7987 /* Record that VEC_ARRAY is now dead. */
7988 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7990 else
7992 new_stmt = NULL;
7993 if (grouped_store)
7995 if (j == 0)
7996 result_chain.create (group_size);
7997 /* Permute. */
7998 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
7999 gsi, &result_chain);
8002 stmt_vec_info next_stmt_info = first_stmt_info;
8003 for (i = 0; i < vec_num; i++)
8005 unsigned misalign;
8006 unsigned HOST_WIDE_INT align;
8008 tree final_mask = NULL_TREE;
8009 if (loop_masks)
8010 final_mask = vect_get_loop_mask (gsi, loop_masks,
8011 vec_num * ncopies,
8012 vectype, vec_num * j + i);
8013 if (vec_mask)
8014 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8015 vec_mask, gsi);
8017 if (memory_access_type == VMAT_GATHER_SCATTER)
8019 tree scale = size_int (gs_info.scale);
8020 gcall *call;
8021 if (loop_masks)
8022 call = gimple_build_call_internal
8023 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8024 scale, vec_oprnd, final_mask);
8025 else
8026 call = gimple_build_call_internal
8027 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8028 scale, vec_oprnd);
8029 gimple_call_set_nothrow (call, true);
8030 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8031 new_stmt = call;
8032 break;
8035 if (i > 0)
8036 /* Bump the vector pointer. */
8037 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8038 gsi, stmt_info, bump);
8040 if (slp)
8041 vec_oprnd = vec_oprnds[i];
8042 else if (grouped_store)
8043 /* For grouped stores vectorized defs are interleaved in
8044 vect_permute_store_chain(). */
8045 vec_oprnd = result_chain[i];
8047 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8048 if (aligned_access_p (first_dr_info))
8049 misalign = 0;
8050 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8052 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8053 misalign = 0;
8055 else
8056 misalign = DR_MISALIGNMENT (first_dr_info);
8057 if (dataref_offset == NULL_TREE
8058 && TREE_CODE (dataref_ptr) == SSA_NAME)
8059 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8060 misalign);
8062 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8064 tree perm_mask = perm_mask_for_reverse (vectype);
8065 tree perm_dest = vect_create_destination_var
8066 (vect_get_store_rhs (stmt_info), vectype);
8067 tree new_temp = make_ssa_name (perm_dest);
8069 /* Generate the permute statement. */
8070 gimple *perm_stmt
8071 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8072 vec_oprnd, perm_mask);
8073 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8075 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8076 vec_oprnd = new_temp;
8079 /* Arguments are ready. Create the new vector stmt. */
8080 if (final_mask)
8082 align = least_bit_hwi (misalign | align);
8083 tree ptr = build_int_cst (ref_type, align);
8084 gcall *call
8085 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8086 dataref_ptr, ptr,
8087 final_mask, vec_oprnd);
8088 gimple_call_set_nothrow (call, true);
8089 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8090 new_stmt = call;
8092 else if (loop_lens)
8094 tree final_len
8095 = vect_get_loop_len (loop_vinfo, loop_lens,
8096 vec_num * ncopies, vec_num * j + i);
8097 align = least_bit_hwi (misalign | align);
8098 tree ptr = build_int_cst (ref_type, align);
8099 machine_mode vmode = TYPE_MODE (vectype);
8100 opt_machine_mode new_ovmode
8101 = get_len_load_store_mode (vmode, false);
8102 machine_mode new_vmode = new_ovmode.require ();
8103 /* Need conversion if it's wrapped with VnQI. */
8104 if (vmode != new_vmode)
8106 tree new_vtype
8107 = build_vector_type_for_mode (unsigned_intQI_type_node,
8108 new_vmode);
8109 tree var
8110 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8111 vec_oprnd
8112 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8113 gassign *new_stmt
8114 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8115 vec_oprnd);
8116 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8117 gsi);
8118 vec_oprnd = var;
8120 gcall *call
8121 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8122 ptr, final_len, vec_oprnd);
8123 gimple_call_set_nothrow (call, true);
8124 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8125 new_stmt = call;
8127 else
8129 data_ref = fold_build2 (MEM_REF, vectype,
8130 dataref_ptr,
8131 dataref_offset
8132 ? dataref_offset
8133 : build_int_cst (ref_type, 0));
8134 if (aligned_access_p (first_dr_info))
8136 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8137 TREE_TYPE (data_ref)
8138 = build_aligned_type (TREE_TYPE (data_ref),
8139 align * BITS_PER_UNIT);
8140 else
8141 TREE_TYPE (data_ref)
8142 = build_aligned_type (TREE_TYPE (data_ref),
8143 TYPE_ALIGN (elem_type));
8144 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8145 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8146 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8149 if (slp)
8150 continue;
8152 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8153 if (!next_stmt_info)
8154 break;
8157 if (!slp)
8159 if (j == 0)
8160 *vec_stmt = new_stmt;
8161 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8165 for (i = 0; i < group_size; ++i)
8167 vec<tree> oprndsi = gvec_oprnds[i];
8168 oprndsi.release ();
8170 oprnds.release ();
8171 result_chain.release ();
8172 vec_oprnds.release ();
8174 return true;
8177 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8178 VECTOR_CST mask. No checks are made that the target platform supports the
8179 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8180 vect_gen_perm_mask_checked. */
8182 tree
8183 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8185 tree mask_type;
8187 poly_uint64 nunits = sel.length ();
8188 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8190 mask_type = build_vector_type (ssizetype, nunits);
8191 return vec_perm_indices_to_tree (mask_type, sel);
8194 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8195 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8197 tree
8198 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8200 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8201 return vect_gen_perm_mask_any (vectype, sel);
8204 /* Given a vector variable X and Y, that was generated for the scalar
8205 STMT_INFO, generate instructions to permute the vector elements of X and Y
8206 using permutation mask MASK_VEC, insert them at *GSI and return the
8207 permuted vector variable. */
8209 static tree
8210 permute_vec_elements (vec_info *vinfo,
8211 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8212 gimple_stmt_iterator *gsi)
8214 tree vectype = TREE_TYPE (x);
8215 tree perm_dest, data_ref;
8216 gimple *perm_stmt;
8218 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8219 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8220 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8221 else
8222 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8223 data_ref = make_ssa_name (perm_dest);
8225 /* Generate the permute statement. */
8226 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8227 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8229 return data_ref;
8232 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8233 inserting them on the loops preheader edge. Returns true if we
8234 were successful in doing so (and thus STMT_INFO can be moved then),
8235 otherwise returns false. */
8237 static bool
8238 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8240 ssa_op_iter i;
8241 tree op;
8242 bool any = false;
8244 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8246 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8247 if (!gimple_nop_p (def_stmt)
8248 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8250 /* Make sure we don't need to recurse. While we could do
8251 so in simple cases when there are more complex use webs
8252 we don't have an easy way to preserve stmt order to fulfil
8253 dependencies within them. */
8254 tree op2;
8255 ssa_op_iter i2;
8256 if (gimple_code (def_stmt) == GIMPLE_PHI)
8257 return false;
8258 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8260 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8261 if (!gimple_nop_p (def_stmt2)
8262 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8263 return false;
8265 any = true;
8269 if (!any)
8270 return true;
8272 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8274 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8275 if (!gimple_nop_p (def_stmt)
8276 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8278 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8279 gsi_remove (&gsi, false);
8280 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8284 return true;
8287 /* vectorizable_load.
8289 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8290 that can be vectorized.
8291 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8292 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8293 Return true if STMT_INFO is vectorizable in this way. */
8295 static bool
8296 vectorizable_load (vec_info *vinfo,
8297 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8298 gimple **vec_stmt, slp_tree slp_node,
8299 stmt_vector_for_cost *cost_vec)
8301 tree scalar_dest;
8302 tree vec_dest = NULL;
8303 tree data_ref = NULL;
8304 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8305 class loop *loop = NULL;
8306 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8307 bool nested_in_vect_loop = false;
8308 tree elem_type;
8309 tree new_temp;
8310 machine_mode mode;
8311 tree dummy;
8312 tree dataref_ptr = NULL_TREE;
8313 tree dataref_offset = NULL_TREE;
8314 gimple *ptr_incr = NULL;
8315 int ncopies;
8316 int i, j;
8317 unsigned int group_size;
8318 poly_uint64 group_gap_adj;
8319 tree msq = NULL_TREE, lsq;
8320 tree offset = NULL_TREE;
8321 tree byte_offset = NULL_TREE;
8322 tree realignment_token = NULL_TREE;
8323 gphi *phi = NULL;
8324 vec<tree> dr_chain = vNULL;
8325 bool grouped_load = false;
8326 stmt_vec_info first_stmt_info;
8327 stmt_vec_info first_stmt_info_for_drptr = NULL;
8328 bool compute_in_loop = false;
8329 class loop *at_loop;
8330 int vec_num;
8331 bool slp = (slp_node != NULL);
8332 bool slp_perm = false;
8333 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8334 poly_uint64 vf;
8335 tree aggr_type;
8336 gather_scatter_info gs_info;
8337 tree ref_type;
8338 enum vect_def_type mask_dt = vect_unknown_def_type;
8340 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8341 return false;
8343 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8344 && ! vec_stmt)
8345 return false;
8347 if (!STMT_VINFO_DATA_REF (stmt_info))
8348 return false;
8350 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8351 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8352 which can be different when reduction chains were re-ordered.
8353 Now that we figured we're a dataref reset stmt_info back to
8354 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8355 refactored in a way to maintain the dr_vec_info pointer for the
8356 relevant access explicitely. */
8357 stmt_vec_info orig_stmt_info = stmt_info;
8358 if (slp_node)
8359 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8361 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8362 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8364 scalar_dest = gimple_assign_lhs (assign);
8365 if (TREE_CODE (scalar_dest) != SSA_NAME)
8366 return false;
8368 tree_code code = gimple_assign_rhs_code (assign);
8369 if (code != ARRAY_REF
8370 && code != BIT_FIELD_REF
8371 && code != INDIRECT_REF
8372 && code != COMPONENT_REF
8373 && code != IMAGPART_EXPR
8374 && code != REALPART_EXPR
8375 && code != MEM_REF
8376 && TREE_CODE_CLASS (code) != tcc_declaration)
8377 return false;
8379 else
8381 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8382 if (!call || !gimple_call_internal_p (call))
8383 return false;
8385 internal_fn ifn = gimple_call_internal_fn (call);
8386 if (!internal_load_fn_p (ifn))
8387 return false;
8389 scalar_dest = gimple_call_lhs (call);
8390 if (!scalar_dest)
8391 return false;
8393 int mask_index = internal_fn_mask_index (ifn);
8394 if (mask_index >= 0)
8396 mask = gimple_call_arg (call, mask_index);
8397 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8398 &mask_vectype))
8399 return false;
8403 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8404 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8406 if (loop_vinfo)
8408 loop = LOOP_VINFO_LOOP (loop_vinfo);
8409 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8410 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8412 else
8413 vf = 1;
8415 /* Multiple types in SLP are handled by creating the appropriate number of
8416 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8417 case of SLP. */
8418 if (slp)
8419 ncopies = 1;
8420 else
8421 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8423 gcc_assert (ncopies >= 1);
8425 /* FORNOW. This restriction should be relaxed. */
8426 if (nested_in_vect_loop && ncopies > 1)
8428 if (dump_enabled_p ())
8429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8430 "multiple types in nested loop.\n");
8431 return false;
8434 /* Invalidate assumptions made by dependence analysis when vectorization
8435 on the unrolled body effectively re-orders stmts. */
8436 if (ncopies > 1
8437 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8438 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8439 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8441 if (dump_enabled_p ())
8442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8443 "cannot perform implicit CSE when unrolling "
8444 "with negative dependence distance\n");
8445 return false;
8448 elem_type = TREE_TYPE (vectype);
8449 mode = TYPE_MODE (vectype);
8451 /* FORNOW. In some cases can vectorize even if data-type not supported
8452 (e.g. - data copies). */
8453 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8455 if (dump_enabled_p ())
8456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8457 "Aligned load, but unsupported type.\n");
8458 return false;
8461 /* Check if the load is a part of an interleaving chain. */
8462 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8464 grouped_load = true;
8465 /* FORNOW */
8466 gcc_assert (!nested_in_vect_loop);
8467 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8469 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8470 group_size = DR_GROUP_SIZE (first_stmt_info);
8472 /* Refuse non-SLP vectorization of SLP-only groups. */
8473 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8475 if (dump_enabled_p ())
8476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8477 "cannot vectorize load in non-SLP mode.\n");
8478 return false;
8481 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8483 slp_perm = true;
8485 if (!loop_vinfo)
8487 /* In BB vectorization we may not actually use a loaded vector
8488 accessing elements in excess of DR_GROUP_SIZE. */
8489 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8490 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8491 unsigned HOST_WIDE_INT nunits;
8492 unsigned j, k, maxk = 0;
8493 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8494 if (k > maxk)
8495 maxk = k;
8496 tree vectype = STMT_VINFO_VECTYPE (group_info);
8497 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8498 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8500 if (dump_enabled_p ())
8501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8502 "BB vectorization with gaps at the end of "
8503 "a load is not supported\n");
8504 return false;
8508 auto_vec<tree> tem;
8509 unsigned n_perms;
8510 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8511 true, &n_perms))
8513 if (dump_enabled_p ())
8514 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8515 vect_location,
8516 "unsupported load permutation\n");
8517 return false;
8521 /* Invalidate assumptions made by dependence analysis when vectorization
8522 on the unrolled body effectively re-orders stmts. */
8523 if (!PURE_SLP_STMT (stmt_info)
8524 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8525 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8526 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8528 if (dump_enabled_p ())
8529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8530 "cannot perform implicit CSE when performing "
8531 "group loads with negative dependence distance\n");
8532 return false;
8535 else
8536 group_size = 1;
8538 vect_memory_access_type memory_access_type;
8539 enum dr_alignment_support alignment_support_scheme;
8540 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8541 ncopies, &memory_access_type,
8542 &alignment_support_scheme, &gs_info))
8543 return false;
8545 if (mask)
8547 if (memory_access_type == VMAT_CONTIGUOUS)
8549 machine_mode vec_mode = TYPE_MODE (vectype);
8550 if (!VECTOR_MODE_P (vec_mode)
8551 || !can_vec_mask_load_store_p (vec_mode,
8552 TYPE_MODE (mask_vectype), true))
8553 return false;
8555 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8556 && memory_access_type != VMAT_GATHER_SCATTER)
8558 if (dump_enabled_p ())
8559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8560 "unsupported access type for masked load.\n");
8561 return false;
8565 if (!vec_stmt) /* transformation not required. */
8567 if (!slp)
8568 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8570 if (loop_vinfo
8571 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8572 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8573 group_size, memory_access_type,
8574 &gs_info, mask);
8576 if (dump_enabled_p ()
8577 && memory_access_type != VMAT_ELEMENTWISE
8578 && memory_access_type != VMAT_GATHER_SCATTER
8579 && alignment_support_scheme != dr_aligned)
8580 dump_printf_loc (MSG_NOTE, vect_location,
8581 "Vectorizing an unaligned access.\n");
8583 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8584 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8585 slp_node, cost_vec);
8586 return true;
8589 if (!slp)
8590 gcc_assert (memory_access_type
8591 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8593 if (dump_enabled_p ())
8594 dump_printf_loc (MSG_NOTE, vect_location,
8595 "transform load. ncopies = %d\n", ncopies);
8597 /* Transform. */
8599 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8600 ensure_base_align (dr_info);
8602 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8604 vect_build_gather_load_calls (vinfo,
8605 stmt_info, gsi, vec_stmt, &gs_info, mask);
8606 return true;
8609 if (memory_access_type == VMAT_INVARIANT)
8611 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8612 /* If we have versioned for aliasing or the loop doesn't
8613 have any data dependencies that would preclude this,
8614 then we are sure this is a loop invariant load and
8615 thus we can insert it on the preheader edge. */
8616 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8617 && !nested_in_vect_loop
8618 && hoist_defs_of_uses (stmt_info, loop));
8619 if (hoist_p)
8621 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8622 if (dump_enabled_p ())
8623 dump_printf_loc (MSG_NOTE, vect_location,
8624 "hoisting out of the vectorized loop: %G", stmt);
8625 scalar_dest = copy_ssa_name (scalar_dest);
8626 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8627 gsi_insert_on_edge_immediate
8628 (loop_preheader_edge (loop),
8629 gimple_build_assign (scalar_dest, rhs));
8631 /* These copies are all equivalent, but currently the representation
8632 requires a separate STMT_VINFO_VEC_STMT for each one. */
8633 gimple_stmt_iterator gsi2 = *gsi;
8634 gsi_next (&gsi2);
8635 for (j = 0; j < ncopies; j++)
8637 if (hoist_p)
8638 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8639 vectype, NULL);
8640 else
8641 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8642 vectype, &gsi2);
8643 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8644 if (slp)
8645 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8646 else
8648 if (j == 0)
8649 *vec_stmt = new_stmt;
8650 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8653 return true;
8656 if (memory_access_type == VMAT_ELEMENTWISE
8657 || memory_access_type == VMAT_STRIDED_SLP)
8659 gimple_stmt_iterator incr_gsi;
8660 bool insert_after;
8661 tree offvar;
8662 tree ivstep;
8663 tree running_off;
8664 vec<constructor_elt, va_gc> *v = NULL;
8665 tree stride_base, stride_step, alias_off;
8666 /* Checked by get_load_store_type. */
8667 unsigned int const_nunits = nunits.to_constant ();
8668 unsigned HOST_WIDE_INT cst_offset = 0;
8669 tree dr_offset;
8671 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8672 gcc_assert (!nested_in_vect_loop);
8674 if (grouped_load)
8676 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8677 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8679 else
8681 first_stmt_info = stmt_info;
8682 first_dr_info = dr_info;
8684 if (slp && grouped_load)
8686 group_size = DR_GROUP_SIZE (first_stmt_info);
8687 ref_type = get_group_alias_ptr_type (first_stmt_info);
8689 else
8691 if (grouped_load)
8692 cst_offset
8693 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8694 * vect_get_place_in_interleaving_chain (stmt_info,
8695 first_stmt_info));
8696 group_size = 1;
8697 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8700 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8701 stride_base
8702 = fold_build_pointer_plus
8703 (DR_BASE_ADDRESS (first_dr_info->dr),
8704 size_binop (PLUS_EXPR,
8705 convert_to_ptrofftype (dr_offset),
8706 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8707 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8709 /* For a load with loop-invariant (but other than power-of-2)
8710 stride (i.e. not a grouped access) like so:
8712 for (i = 0; i < n; i += stride)
8713 ... = array[i];
8715 we generate a new induction variable and new accesses to
8716 form a new vector (or vectors, depending on ncopies):
8718 for (j = 0; ; j += VF*stride)
8719 tmp1 = array[j];
8720 tmp2 = array[j + stride];
8722 vectemp = {tmp1, tmp2, ...}
8725 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8726 build_int_cst (TREE_TYPE (stride_step), vf));
8728 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8730 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8731 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8732 create_iv (stride_base, ivstep, NULL,
8733 loop, &incr_gsi, insert_after,
8734 &offvar, NULL);
8736 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8738 running_off = offvar;
8739 alias_off = build_int_cst (ref_type, 0);
8740 int nloads = const_nunits;
8741 int lnel = 1;
8742 tree ltype = TREE_TYPE (vectype);
8743 tree lvectype = vectype;
8744 auto_vec<tree> dr_chain;
8745 if (memory_access_type == VMAT_STRIDED_SLP)
8747 if (group_size < const_nunits)
8749 /* First check if vec_init optab supports construction from vector
8750 elts directly. Otherwise avoid emitting a constructor of
8751 vector elements by performing the loads using an integer type
8752 of the same size, constructing a vector of those and then
8753 re-interpreting it as the original vector type. This avoids a
8754 huge runtime penalty due to the general inability to perform
8755 store forwarding from smaller stores to a larger load. */
8756 tree ptype;
8757 tree vtype
8758 = vector_vector_composition_type (vectype,
8759 const_nunits / group_size,
8760 &ptype);
8761 if (vtype != NULL_TREE)
8763 nloads = const_nunits / group_size;
8764 lnel = group_size;
8765 lvectype = vtype;
8766 ltype = ptype;
8769 else
8771 nloads = 1;
8772 lnel = const_nunits;
8773 ltype = vectype;
8775 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8777 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8778 else if (nloads == 1)
8779 ltype = vectype;
8781 if (slp)
8783 /* For SLP permutation support we need to load the whole group,
8784 not only the number of vector stmts the permutation result
8785 fits in. */
8786 if (slp_perm)
8788 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8789 variable VF. */
8790 unsigned int const_vf = vf.to_constant ();
8791 ncopies = CEIL (group_size * const_vf, const_nunits);
8792 dr_chain.create (ncopies);
8794 else
8795 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8797 unsigned int group_el = 0;
8798 unsigned HOST_WIDE_INT
8799 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8800 for (j = 0; j < ncopies; j++)
8802 if (nloads > 1)
8803 vec_alloc (v, nloads);
8804 gimple *new_stmt = NULL;
8805 for (i = 0; i < nloads; i++)
8807 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8808 group_el * elsz + cst_offset);
8809 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8810 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8811 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8812 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8813 if (nloads > 1)
8814 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8815 gimple_assign_lhs (new_stmt));
8817 group_el += lnel;
8818 if (! slp
8819 || group_el == group_size)
8821 tree newoff = copy_ssa_name (running_off);
8822 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8823 running_off, stride_step);
8824 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8826 running_off = newoff;
8827 group_el = 0;
8830 if (nloads > 1)
8832 tree vec_inv = build_constructor (lvectype, v);
8833 new_temp = vect_init_vector (vinfo, stmt_info,
8834 vec_inv, lvectype, gsi);
8835 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8836 if (lvectype != vectype)
8838 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8839 VIEW_CONVERT_EXPR,
8840 build1 (VIEW_CONVERT_EXPR,
8841 vectype, new_temp));
8842 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8846 if (slp)
8848 if (slp_perm)
8849 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8850 else
8851 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8853 else
8855 if (j == 0)
8856 *vec_stmt = new_stmt;
8857 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8860 if (slp_perm)
8862 unsigned n_perms;
8863 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8864 false, &n_perms);
8866 return true;
8869 if (memory_access_type == VMAT_GATHER_SCATTER
8870 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8871 grouped_load = false;
8873 if (grouped_load)
8875 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8876 group_size = DR_GROUP_SIZE (first_stmt_info);
8877 /* For SLP vectorization we directly vectorize a subchain
8878 without permutation. */
8879 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8880 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8881 /* For BB vectorization always use the first stmt to base
8882 the data ref pointer on. */
8883 if (bb_vinfo)
8884 first_stmt_info_for_drptr
8885 = vect_find_first_scalar_stmt_in_slp (slp_node);
8887 /* Check if the chain of loads is already vectorized. */
8888 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
8889 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8890 ??? But we can only do so if there is exactly one
8891 as we have no way to get at the rest. Leave the CSE
8892 opportunity alone.
8893 ??? With the group load eventually participating
8894 in multiple different permutations (having multiple
8895 slp nodes which refer to the same group) the CSE
8896 is even wrong code. See PR56270. */
8897 && !slp)
8899 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8900 return true;
8902 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8903 group_gap_adj = 0;
8905 /* VEC_NUM is the number of vect stmts to be created for this group. */
8906 if (slp)
8908 grouped_load = false;
8909 /* If an SLP permutation is from N elements to N elements,
8910 and if one vector holds a whole number of N, we can load
8911 the inputs to the permutation in the same way as an
8912 unpermuted sequence. In other cases we need to load the
8913 whole group, not only the number of vector stmts the
8914 permutation result fits in. */
8915 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
8916 if (slp_perm
8917 && (group_size != scalar_lanes
8918 || !multiple_p (nunits, group_size)))
8920 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8921 variable VF; see vect_transform_slp_perm_load. */
8922 unsigned int const_vf = vf.to_constant ();
8923 unsigned int const_nunits = nunits.to_constant ();
8924 vec_num = CEIL (group_size * const_vf, const_nunits);
8925 group_gap_adj = vf * group_size - nunits * vec_num;
8927 else
8929 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8930 group_gap_adj
8931 = group_size - scalar_lanes;
8934 else
8935 vec_num = group_size;
8937 ref_type = get_group_alias_ptr_type (first_stmt_info);
8939 else
8941 first_stmt_info = stmt_info;
8942 first_dr_info = dr_info;
8943 group_size = vec_num = 1;
8944 group_gap_adj = 0;
8945 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8948 gcc_assert (alignment_support_scheme);
8949 vec_loop_masks *loop_masks
8950 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8951 ? &LOOP_VINFO_MASKS (loop_vinfo)
8952 : NULL);
8953 vec_loop_lens *loop_lens
8954 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8955 ? &LOOP_VINFO_LENS (loop_vinfo)
8956 : NULL);
8958 /* Shouldn't go with length-based approach if fully masked. */
8959 gcc_assert (!loop_lens || !loop_masks);
8961 /* Targets with store-lane instructions must not require explicit
8962 realignment. vect_supportable_dr_alignment always returns either
8963 dr_aligned or dr_unaligned_supported for masked operations. */
8964 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8965 && !mask
8966 && !loop_masks)
8967 || alignment_support_scheme == dr_aligned
8968 || alignment_support_scheme == dr_unaligned_supported);
8970 /* In case the vectorization factor (VF) is bigger than the number
8971 of elements that we can fit in a vectype (nunits), we have to generate
8972 more than one vector stmt - i.e - we need to "unroll" the
8973 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8974 from one copy of the vector stmt to the next, in the field
8975 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8976 stages to find the correct vector defs to be used when vectorizing
8977 stmts that use the defs of the current stmt. The example below
8978 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8979 need to create 4 vectorized stmts):
8981 before vectorization:
8982 RELATED_STMT VEC_STMT
8983 S1: x = memref - -
8984 S2: z = x + 1 - -
8986 step 1: vectorize stmt S1:
8987 We first create the vector stmt VS1_0, and, as usual, record a
8988 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8989 Next, we create the vector stmt VS1_1, and record a pointer to
8990 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8991 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8992 stmts and pointers:
8993 RELATED_STMT VEC_STMT
8994 VS1_0: vx0 = memref0 VS1_1 -
8995 VS1_1: vx1 = memref1 VS1_2 -
8996 VS1_2: vx2 = memref2 VS1_3 -
8997 VS1_3: vx3 = memref3 - -
8998 S1: x = load - VS1_0
8999 S2: z = x + 1 - -
9002 /* In case of interleaving (non-unit grouped access):
9004 S1: x2 = &base + 2
9005 S2: x0 = &base
9006 S3: x1 = &base + 1
9007 S4: x3 = &base + 3
9009 Vectorized loads are created in the order of memory accesses
9010 starting from the access of the first stmt of the chain:
9012 VS1: vx0 = &base
9013 VS2: vx1 = &base + vec_size*1
9014 VS3: vx3 = &base + vec_size*2
9015 VS4: vx4 = &base + vec_size*3
9017 Then permutation statements are generated:
9019 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9020 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9023 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9024 (the order of the data-refs in the output of vect_permute_load_chain
9025 corresponds to the order of scalar stmts in the interleaving chain - see
9026 the documentation of vect_permute_load_chain()).
9027 The generation of permutation stmts and recording them in
9028 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9030 In case of both multiple types and interleaving, the vector loads and
9031 permutation stmts above are created for every copy. The result vector
9032 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9033 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9035 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9036 on a target that supports unaligned accesses (dr_unaligned_supported)
9037 we generate the following code:
9038 p = initial_addr;
9039 indx = 0;
9040 loop {
9041 p = p + indx * vectype_size;
9042 vec_dest = *(p);
9043 indx = indx + 1;
9046 Otherwise, the data reference is potentially unaligned on a target that
9047 does not support unaligned accesses (dr_explicit_realign_optimized) -
9048 then generate the following code, in which the data in each iteration is
9049 obtained by two vector loads, one from the previous iteration, and one
9050 from the current iteration:
9051 p1 = initial_addr;
9052 msq_init = *(floor(p1))
9053 p2 = initial_addr + VS - 1;
9054 realignment_token = call target_builtin;
9055 indx = 0;
9056 loop {
9057 p2 = p2 + indx * vectype_size
9058 lsq = *(floor(p2))
9059 vec_dest = realign_load (msq, lsq, realignment_token)
9060 indx = indx + 1;
9061 msq = lsq;
9062 } */
9064 /* If the misalignment remains the same throughout the execution of the
9065 loop, we can create the init_addr and permutation mask at the loop
9066 preheader. Otherwise, it needs to be created inside the loop.
9067 This can only occur when vectorizing memory accesses in the inner-loop
9068 nested within an outer-loop that is being vectorized. */
9070 if (nested_in_vect_loop
9071 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9072 GET_MODE_SIZE (TYPE_MODE (vectype))))
9074 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9075 compute_in_loop = true;
9078 bool diff_first_stmt_info
9079 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9081 if ((alignment_support_scheme == dr_explicit_realign_optimized
9082 || alignment_support_scheme == dr_explicit_realign)
9083 && !compute_in_loop)
9085 /* If we have different first_stmt_info, we can't set up realignment
9086 here, since we can't guarantee first_stmt_info DR has been
9087 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9088 distance from first_stmt_info DR instead as below. */
9089 if (!diff_first_stmt_info)
9090 msq = vect_setup_realignment (vinfo,
9091 first_stmt_info, gsi, &realignment_token,
9092 alignment_support_scheme, NULL_TREE,
9093 &at_loop);
9094 if (alignment_support_scheme == dr_explicit_realign_optimized)
9096 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9097 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9098 size_one_node);
9099 gcc_assert (!first_stmt_info_for_drptr);
9102 else
9103 at_loop = loop;
9105 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9106 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9108 tree bump;
9109 tree vec_offset = NULL_TREE;
9110 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9112 aggr_type = NULL_TREE;
9113 bump = NULL_TREE;
9115 else if (memory_access_type == VMAT_GATHER_SCATTER)
9117 aggr_type = elem_type;
9118 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9119 &bump, &vec_offset);
9121 else
9123 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9124 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9125 else
9126 aggr_type = vectype;
9127 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9128 memory_access_type);
9131 vec<tree> vec_offsets = vNULL;
9132 auto_vec<tree> vec_masks;
9133 if (mask)
9134 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9135 mask, &vec_masks, mask_vectype, NULL_TREE);
9136 tree vec_mask = NULL_TREE;
9137 poly_uint64 group_elt = 0;
9138 for (j = 0; j < ncopies; j++)
9140 /* 1. Create the vector or array pointer update chain. */
9141 if (j == 0)
9143 bool simd_lane_access_p
9144 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9145 if (simd_lane_access_p
9146 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9147 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9148 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9149 && integer_zerop (DR_INIT (first_dr_info->dr))
9150 && alias_sets_conflict_p (get_alias_set (aggr_type),
9151 get_alias_set (TREE_TYPE (ref_type)))
9152 && (alignment_support_scheme == dr_aligned
9153 || alignment_support_scheme == dr_unaligned_supported))
9155 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9156 dataref_offset = build_int_cst (ref_type, 0);
9158 else if (diff_first_stmt_info)
9160 dataref_ptr
9161 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9162 aggr_type, at_loop, offset, &dummy,
9163 gsi, &ptr_incr, simd_lane_access_p,
9164 byte_offset, bump);
9165 /* Adjust the pointer by the difference to first_stmt. */
9166 data_reference_p ptrdr
9167 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9168 tree diff
9169 = fold_convert (sizetype,
9170 size_binop (MINUS_EXPR,
9171 DR_INIT (first_dr_info->dr),
9172 DR_INIT (ptrdr)));
9173 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9174 stmt_info, diff);
9175 if (alignment_support_scheme == dr_explicit_realign)
9177 msq = vect_setup_realignment (vinfo,
9178 first_stmt_info_for_drptr, gsi,
9179 &realignment_token,
9180 alignment_support_scheme,
9181 dataref_ptr, &at_loop);
9182 gcc_assert (!compute_in_loop);
9185 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9187 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9188 &dataref_ptr, &vec_offsets, ncopies);
9189 vec_offset = vec_offsets[0];
9191 else
9192 dataref_ptr
9193 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9194 at_loop,
9195 offset, &dummy, gsi, &ptr_incr,
9196 simd_lane_access_p,
9197 byte_offset, bump);
9198 if (mask)
9199 vec_mask = vec_masks[0];
9201 else
9203 if (dataref_offset)
9204 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9205 bump);
9206 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9207 vec_offset = vec_offsets[j];
9208 else
9209 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9210 stmt_info, bump);
9211 if (mask)
9212 vec_mask = vec_masks[j];
9215 if (grouped_load || slp_perm)
9216 dr_chain.create (vec_num);
9218 gimple *new_stmt = NULL;
9219 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9221 tree vec_array;
9223 vec_array = create_vector_array (vectype, vec_num);
9225 tree final_mask = NULL_TREE;
9226 if (loop_masks)
9227 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9228 vectype, j);
9229 if (vec_mask)
9230 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9231 vec_mask, gsi);
9233 gcall *call;
9234 if (final_mask)
9236 /* Emit:
9237 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9238 VEC_MASK). */
9239 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9240 tree alias_ptr = build_int_cst (ref_type, align);
9241 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9242 dataref_ptr, alias_ptr,
9243 final_mask);
9245 else
9247 /* Emit:
9248 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9249 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9250 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9252 gimple_call_set_lhs (call, vec_array);
9253 gimple_call_set_nothrow (call, true);
9254 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9255 new_stmt = call;
9257 /* Extract each vector into an SSA_NAME. */
9258 for (i = 0; i < vec_num; i++)
9260 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9261 vec_array, i);
9262 dr_chain.quick_push (new_temp);
9265 /* Record the mapping between SSA_NAMEs and statements. */
9266 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9268 /* Record that VEC_ARRAY is now dead. */
9269 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9271 else
9273 for (i = 0; i < vec_num; i++)
9275 tree final_mask = NULL_TREE;
9276 if (loop_masks
9277 && memory_access_type != VMAT_INVARIANT)
9278 final_mask = vect_get_loop_mask (gsi, loop_masks,
9279 vec_num * ncopies,
9280 vectype, vec_num * j + i);
9281 if (vec_mask)
9282 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9283 vec_mask, gsi);
9285 if (i > 0)
9286 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9287 gsi, stmt_info, bump);
9289 /* 2. Create the vector-load in the loop. */
9290 switch (alignment_support_scheme)
9292 case dr_aligned:
9293 case dr_unaligned_supported:
9295 unsigned int misalign;
9296 unsigned HOST_WIDE_INT align;
9298 if (memory_access_type == VMAT_GATHER_SCATTER)
9300 tree zero = build_zero_cst (vectype);
9301 tree scale = size_int (gs_info.scale);
9302 gcall *call;
9303 if (loop_masks)
9304 call = gimple_build_call_internal
9305 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9306 vec_offset, scale, zero, final_mask);
9307 else
9308 call = gimple_build_call_internal
9309 (IFN_GATHER_LOAD, 4, dataref_ptr,
9310 vec_offset, scale, zero);
9311 gimple_call_set_nothrow (call, true);
9312 new_stmt = call;
9313 data_ref = NULL_TREE;
9314 break;
9317 align =
9318 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9319 if (alignment_support_scheme == dr_aligned)
9321 gcc_assert (aligned_access_p (first_dr_info));
9322 misalign = 0;
9324 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9326 align = dr_alignment
9327 (vect_dr_behavior (vinfo, first_dr_info));
9328 misalign = 0;
9330 else
9331 misalign = DR_MISALIGNMENT (first_dr_info);
9332 if (dataref_offset == NULL_TREE
9333 && TREE_CODE (dataref_ptr) == SSA_NAME)
9334 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9335 align, misalign);
9337 if (final_mask)
9339 align = least_bit_hwi (misalign | align);
9340 tree ptr = build_int_cst (ref_type, align);
9341 gcall *call
9342 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9343 dataref_ptr, ptr,
9344 final_mask);
9345 gimple_call_set_nothrow (call, true);
9346 new_stmt = call;
9347 data_ref = NULL_TREE;
9349 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9351 tree final_len
9352 = vect_get_loop_len (loop_vinfo, loop_lens,
9353 vec_num * ncopies,
9354 vec_num * j + i);
9355 align = least_bit_hwi (misalign | align);
9356 tree ptr = build_int_cst (ref_type, align);
9357 gcall *call
9358 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9359 dataref_ptr, ptr,
9360 final_len);
9361 gimple_call_set_nothrow (call, true);
9362 new_stmt = call;
9363 data_ref = NULL_TREE;
9365 /* Need conversion if it's wrapped with VnQI. */
9366 machine_mode vmode = TYPE_MODE (vectype);
9367 opt_machine_mode new_ovmode
9368 = get_len_load_store_mode (vmode, true);
9369 machine_mode new_vmode = new_ovmode.require ();
9370 if (vmode != new_vmode)
9372 tree qi_type = unsigned_intQI_type_node;
9373 tree new_vtype
9374 = build_vector_type_for_mode (qi_type, new_vmode);
9375 tree var = vect_get_new_ssa_name (new_vtype,
9376 vect_simple_var);
9377 gimple_set_lhs (call, var);
9378 vect_finish_stmt_generation (vinfo, stmt_info, call,
9379 gsi);
9380 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9381 new_stmt
9382 = gimple_build_assign (vec_dest,
9383 VIEW_CONVERT_EXPR, op);
9386 else
9388 tree ltype = vectype;
9389 tree new_vtype = NULL_TREE;
9390 unsigned HOST_WIDE_INT gap
9391 = DR_GROUP_GAP (first_stmt_info);
9392 unsigned int vect_align
9393 = vect_known_alignment_in_bytes (first_dr_info);
9394 unsigned int scalar_dr_size
9395 = vect_get_scalar_dr_size (first_dr_info);
9396 /* If there's no peeling for gaps but we have a gap
9397 with slp loads then load the lower half of the
9398 vector only. See get_group_load_store_type for
9399 when we apply this optimization. */
9400 if (slp
9401 && loop_vinfo
9402 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9403 && gap != 0
9404 && known_eq (nunits, (group_size - gap) * 2)
9405 && known_eq (nunits, group_size)
9406 && gap >= (vect_align / scalar_dr_size))
9408 tree half_vtype;
9409 new_vtype
9410 = vector_vector_composition_type (vectype, 2,
9411 &half_vtype);
9412 if (new_vtype != NULL_TREE)
9413 ltype = half_vtype;
9415 tree offset
9416 = (dataref_offset ? dataref_offset
9417 : build_int_cst (ref_type, 0));
9418 if (ltype != vectype
9419 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9421 unsigned HOST_WIDE_INT gap_offset
9422 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9423 tree gapcst = build_int_cst (ref_type, gap_offset);
9424 offset = size_binop (PLUS_EXPR, offset, gapcst);
9426 data_ref
9427 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9428 if (alignment_support_scheme == dr_aligned)
9430 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9431 TREE_TYPE (data_ref)
9432 = build_aligned_type (TREE_TYPE (data_ref),
9433 align * BITS_PER_UNIT);
9434 else
9435 TREE_TYPE (data_ref)
9436 = build_aligned_type (TREE_TYPE (data_ref),
9437 TYPE_ALIGN (elem_type));
9438 if (ltype != vectype)
9440 vect_copy_ref_info (data_ref,
9441 DR_REF (first_dr_info->dr));
9442 tree tem = make_ssa_name (ltype);
9443 new_stmt = gimple_build_assign (tem, data_ref);
9444 vect_finish_stmt_generation (vinfo, stmt_info,
9445 new_stmt, gsi);
9446 data_ref = NULL;
9447 vec<constructor_elt, va_gc> *v;
9448 vec_alloc (v, 2);
9449 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9451 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9452 build_zero_cst (ltype));
9453 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9455 else
9457 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9458 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9459 build_zero_cst (ltype));
9461 gcc_assert (new_vtype != NULL_TREE);
9462 if (new_vtype == vectype)
9463 new_stmt = gimple_build_assign (
9464 vec_dest, build_constructor (vectype, v));
9465 else
9467 tree new_vname = make_ssa_name (new_vtype);
9468 new_stmt = gimple_build_assign (
9469 new_vname, build_constructor (new_vtype, v));
9470 vect_finish_stmt_generation (vinfo, stmt_info,
9471 new_stmt, gsi);
9472 new_stmt = gimple_build_assign (
9473 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9474 new_vname));
9478 break;
9480 case dr_explicit_realign:
9482 tree ptr, bump;
9484 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9486 if (compute_in_loop)
9487 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9488 &realignment_token,
9489 dr_explicit_realign,
9490 dataref_ptr, NULL);
9492 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9493 ptr = copy_ssa_name (dataref_ptr);
9494 else
9495 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9496 // For explicit realign the target alignment should be
9497 // known at compile time.
9498 unsigned HOST_WIDE_INT align =
9499 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9500 new_stmt = gimple_build_assign
9501 (ptr, BIT_AND_EXPR, dataref_ptr,
9502 build_int_cst
9503 (TREE_TYPE (dataref_ptr),
9504 -(HOST_WIDE_INT) align));
9505 vect_finish_stmt_generation (vinfo, stmt_info,
9506 new_stmt, gsi);
9507 data_ref
9508 = build2 (MEM_REF, vectype, ptr,
9509 build_int_cst (ref_type, 0));
9510 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9511 vec_dest = vect_create_destination_var (scalar_dest,
9512 vectype);
9513 new_stmt = gimple_build_assign (vec_dest, data_ref);
9514 new_temp = make_ssa_name (vec_dest, new_stmt);
9515 gimple_assign_set_lhs (new_stmt, new_temp);
9516 gimple_move_vops (new_stmt, stmt_info->stmt);
9517 vect_finish_stmt_generation (vinfo, stmt_info,
9518 new_stmt, gsi);
9519 msq = new_temp;
9521 bump = size_binop (MULT_EXPR, vs,
9522 TYPE_SIZE_UNIT (elem_type));
9523 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9524 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9525 stmt_info, bump);
9526 new_stmt = gimple_build_assign
9527 (NULL_TREE, BIT_AND_EXPR, ptr,
9528 build_int_cst
9529 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9530 ptr = copy_ssa_name (ptr, new_stmt);
9531 gimple_assign_set_lhs (new_stmt, ptr);
9532 vect_finish_stmt_generation (vinfo, stmt_info,
9533 new_stmt, gsi);
9534 data_ref
9535 = build2 (MEM_REF, vectype, ptr,
9536 build_int_cst (ref_type, 0));
9537 break;
9539 case dr_explicit_realign_optimized:
9541 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9542 new_temp = copy_ssa_name (dataref_ptr);
9543 else
9544 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9545 // We should only be doing this if we know the target
9546 // alignment at compile time.
9547 unsigned HOST_WIDE_INT align =
9548 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9549 new_stmt = gimple_build_assign
9550 (new_temp, BIT_AND_EXPR, dataref_ptr,
9551 build_int_cst (TREE_TYPE (dataref_ptr),
9552 -(HOST_WIDE_INT) align));
9553 vect_finish_stmt_generation (vinfo, stmt_info,
9554 new_stmt, gsi);
9555 data_ref
9556 = build2 (MEM_REF, vectype, new_temp,
9557 build_int_cst (ref_type, 0));
9558 break;
9560 default:
9561 gcc_unreachable ();
9563 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9564 /* DATA_REF is null if we've already built the statement. */
9565 if (data_ref)
9567 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9568 new_stmt = gimple_build_assign (vec_dest, data_ref);
9570 new_temp = make_ssa_name (vec_dest, new_stmt);
9571 gimple_set_lhs (new_stmt, new_temp);
9572 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9574 /* 3. Handle explicit realignment if necessary/supported.
9575 Create in loop:
9576 vec_dest = realign_load (msq, lsq, realignment_token) */
9577 if (alignment_support_scheme == dr_explicit_realign_optimized
9578 || alignment_support_scheme == dr_explicit_realign)
9580 lsq = gimple_assign_lhs (new_stmt);
9581 if (!realignment_token)
9582 realignment_token = dataref_ptr;
9583 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9584 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9585 msq, lsq, realignment_token);
9586 new_temp = make_ssa_name (vec_dest, new_stmt);
9587 gimple_assign_set_lhs (new_stmt, new_temp);
9588 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9590 if (alignment_support_scheme == dr_explicit_realign_optimized)
9592 gcc_assert (phi);
9593 if (i == vec_num - 1 && j == ncopies - 1)
9594 add_phi_arg (phi, lsq,
9595 loop_latch_edge (containing_loop),
9596 UNKNOWN_LOCATION);
9597 msq = lsq;
9601 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9603 tree perm_mask = perm_mask_for_reverse (vectype);
9604 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9605 perm_mask, stmt_info, gsi);
9606 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9609 /* Collect vector loads and later create their permutation in
9610 vect_transform_grouped_load (). */
9611 if (grouped_load || slp_perm)
9612 dr_chain.quick_push (new_temp);
9614 /* Store vector loads in the corresponding SLP_NODE. */
9615 if (slp && !slp_perm)
9616 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9618 /* With SLP permutation we load the gaps as well, without
9619 we need to skip the gaps after we manage to fully load
9620 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9621 group_elt += nunits;
9622 if (maybe_ne (group_gap_adj, 0U)
9623 && !slp_perm
9624 && known_eq (group_elt, group_size - group_gap_adj))
9626 poly_wide_int bump_val
9627 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9628 * group_gap_adj);
9629 tree bump = wide_int_to_tree (sizetype, bump_val);
9630 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9631 gsi, stmt_info, bump);
9632 group_elt = 0;
9635 /* Bump the vector pointer to account for a gap or for excess
9636 elements loaded for a permuted SLP load. */
9637 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9639 poly_wide_int bump_val
9640 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9641 * group_gap_adj);
9642 tree bump = wide_int_to_tree (sizetype, bump_val);
9643 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9644 stmt_info, bump);
9648 if (slp && !slp_perm)
9649 continue;
9651 if (slp_perm)
9653 unsigned n_perms;
9654 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9655 gsi, vf, false, &n_perms);
9656 gcc_assert (ok);
9658 else
9660 if (grouped_load)
9662 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9663 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9664 group_size, gsi);
9665 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9667 else
9669 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9672 dr_chain.release ();
9674 if (!slp)
9675 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9677 return true;
9680 /* Function vect_is_simple_cond.
9682 Input:
9683 LOOP - the loop that is being vectorized.
9684 COND - Condition that is checked for simple use.
9686 Output:
9687 *COMP_VECTYPE - the vector type for the comparison.
9688 *DTS - The def types for the arguments of the comparison
9690 Returns whether a COND can be vectorized. Checks whether
9691 condition operands are supportable using vec_is_simple_use. */
9693 static bool
9694 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9695 slp_tree slp_node, tree *comp_vectype,
9696 enum vect_def_type *dts, tree vectype)
9698 tree lhs, rhs;
9699 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9700 slp_tree slp_op;
9702 /* Mask case. */
9703 if (TREE_CODE (cond) == SSA_NAME
9704 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9706 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9707 &slp_op, &dts[0], comp_vectype)
9708 || !*comp_vectype
9709 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9710 return false;
9711 return true;
9714 if (!COMPARISON_CLASS_P (cond))
9715 return false;
9717 lhs = TREE_OPERAND (cond, 0);
9718 rhs = TREE_OPERAND (cond, 1);
9720 if (TREE_CODE (lhs) == SSA_NAME)
9722 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9723 &lhs, &slp_op, &dts[0], &vectype1))
9724 return false;
9726 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9727 || TREE_CODE (lhs) == FIXED_CST)
9728 dts[0] = vect_constant_def;
9729 else
9730 return false;
9732 if (TREE_CODE (rhs) == SSA_NAME)
9734 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9735 &rhs, &slp_op, &dts[1], &vectype2))
9736 return false;
9738 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9739 || TREE_CODE (rhs) == FIXED_CST)
9740 dts[1] = vect_constant_def;
9741 else
9742 return false;
9744 if (vectype1 && vectype2
9745 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9746 TYPE_VECTOR_SUBPARTS (vectype2)))
9747 return false;
9749 *comp_vectype = vectype1 ? vectype1 : vectype2;
9750 /* Invariant comparison. */
9751 if (! *comp_vectype)
9753 tree scalar_type = TREE_TYPE (lhs);
9754 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9755 *comp_vectype = truth_type_for (vectype);
9756 else
9758 /* If we can widen the comparison to match vectype do so. */
9759 if (INTEGRAL_TYPE_P (scalar_type)
9760 && !slp_node
9761 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9762 TYPE_SIZE (TREE_TYPE (vectype))))
9763 scalar_type = build_nonstandard_integer_type
9764 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9765 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9766 slp_node);
9770 return true;
9773 /* vectorizable_condition.
9775 Check if STMT_INFO is conditional modify expression that can be vectorized.
9776 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9777 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9778 at GSI.
9780 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9782 Return true if STMT_INFO is vectorizable in this way. */
9784 static bool
9785 vectorizable_condition (vec_info *vinfo,
9786 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9787 gimple **vec_stmt,
9788 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9790 tree scalar_dest = NULL_TREE;
9791 tree vec_dest = NULL_TREE;
9792 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9793 tree then_clause, else_clause;
9794 tree comp_vectype = NULL_TREE;
9795 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9796 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9797 tree vec_compare;
9798 tree new_temp;
9799 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9800 enum vect_def_type dts[4]
9801 = {vect_unknown_def_type, vect_unknown_def_type,
9802 vect_unknown_def_type, vect_unknown_def_type};
9803 int ndts = 4;
9804 int ncopies;
9805 int vec_num;
9806 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9807 int i;
9808 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9809 vec<tree> vec_oprnds0 = vNULL;
9810 vec<tree> vec_oprnds1 = vNULL;
9811 vec<tree> vec_oprnds2 = vNULL;
9812 vec<tree> vec_oprnds3 = vNULL;
9813 tree vec_cmp_type;
9814 bool masked = false;
9816 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9817 return false;
9819 /* Is vectorizable conditional operation? */
9820 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9821 if (!stmt)
9822 return false;
9824 code = gimple_assign_rhs_code (stmt);
9825 if (code != COND_EXPR)
9826 return false;
9828 stmt_vec_info reduc_info = NULL;
9829 int reduc_index = -1;
9830 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9831 bool for_reduction
9832 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9833 if (for_reduction)
9835 if (STMT_SLP_TYPE (stmt_info))
9836 return false;
9837 reduc_info = info_for_reduction (vinfo, stmt_info);
9838 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9839 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9840 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9841 || reduc_index != -1);
9843 else
9845 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9846 return false;
9848 /* FORNOW: only supported as part of a reduction. */
9849 if (STMT_VINFO_LIVE_P (stmt_info))
9851 if (dump_enabled_p ())
9852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9853 "value used after loop.\n");
9854 return false;
9858 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9859 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9861 if (slp_node)
9863 ncopies = 1;
9864 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9866 else
9868 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9869 vec_num = 1;
9872 gcc_assert (ncopies >= 1);
9873 if (for_reduction && ncopies > 1)
9874 return false; /* FORNOW */
9876 cond_expr = gimple_assign_rhs1 (stmt);
9878 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9879 &comp_vectype, &dts[0], vectype)
9880 || !comp_vectype)
9881 return false;
9883 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9884 slp_tree then_slp_node, else_slp_node;
9885 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9886 &then_clause, &then_slp_node, &dts[2], &vectype1))
9887 return false;
9888 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9889 &else_clause, &else_slp_node, &dts[3], &vectype2))
9890 return false;
9892 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9893 return false;
9895 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9896 return false;
9898 masked = !COMPARISON_CLASS_P (cond_expr);
9899 vec_cmp_type = truth_type_for (comp_vectype);
9901 if (vec_cmp_type == NULL_TREE)
9902 return false;
9904 cond_code = TREE_CODE (cond_expr);
9905 if (!masked)
9907 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9908 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9911 /* For conditional reductions, the "then" value needs to be the candidate
9912 value calculated by this iteration while the "else" value needs to be
9913 the result carried over from previous iterations. If the COND_EXPR
9914 is the other way around, we need to swap it. */
9915 bool must_invert_cmp_result = false;
9916 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9918 if (masked)
9919 must_invert_cmp_result = true;
9920 else
9922 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9923 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9924 if (new_code == ERROR_MARK)
9925 must_invert_cmp_result = true;
9926 else
9928 cond_code = new_code;
9929 /* Make sure we don't accidentally use the old condition. */
9930 cond_expr = NULL_TREE;
9933 std::swap (then_clause, else_clause);
9936 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9938 /* Boolean values may have another representation in vectors
9939 and therefore we prefer bit operations over comparison for
9940 them (which also works for scalar masks). We store opcodes
9941 to use in bitop1 and bitop2. Statement is vectorized as
9942 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9943 depending on bitop1 and bitop2 arity. */
9944 switch (cond_code)
9946 case GT_EXPR:
9947 bitop1 = BIT_NOT_EXPR;
9948 bitop2 = BIT_AND_EXPR;
9949 break;
9950 case GE_EXPR:
9951 bitop1 = BIT_NOT_EXPR;
9952 bitop2 = BIT_IOR_EXPR;
9953 break;
9954 case LT_EXPR:
9955 bitop1 = BIT_NOT_EXPR;
9956 bitop2 = BIT_AND_EXPR;
9957 std::swap (cond_expr0, cond_expr1);
9958 break;
9959 case LE_EXPR:
9960 bitop1 = BIT_NOT_EXPR;
9961 bitop2 = BIT_IOR_EXPR;
9962 std::swap (cond_expr0, cond_expr1);
9963 break;
9964 case NE_EXPR:
9965 bitop1 = BIT_XOR_EXPR;
9966 break;
9967 case EQ_EXPR:
9968 bitop1 = BIT_XOR_EXPR;
9969 bitop2 = BIT_NOT_EXPR;
9970 break;
9971 default:
9972 return false;
9974 cond_code = SSA_NAME;
9977 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
9978 && reduction_type == EXTRACT_LAST_REDUCTION
9979 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
9981 if (dump_enabled_p ())
9982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9983 "reduction comparison operation not supported.\n");
9984 return false;
9987 if (!vec_stmt)
9989 if (bitop1 != NOP_EXPR)
9991 machine_mode mode = TYPE_MODE (comp_vectype);
9992 optab optab;
9994 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9995 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9996 return false;
9998 if (bitop2 != NOP_EXPR)
10000 optab = optab_for_tree_code (bitop2, comp_vectype,
10001 optab_default);
10002 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10003 return false;
10007 vect_cost_for_stmt kind = vector_stmt;
10008 if (reduction_type == EXTRACT_LAST_REDUCTION)
10009 /* Count one reduction-like operation per vector. */
10010 kind = vec_to_scalar;
10011 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10012 return false;
10014 if (slp_node
10015 && (!vect_maybe_update_slp_op_vectype
10016 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10017 || (op_adjust == 1
10018 && !vect_maybe_update_slp_op_vectype
10019 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10020 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10021 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10023 if (dump_enabled_p ())
10024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10025 "incompatible vector types for invariants\n");
10026 return false;
10029 if (loop_vinfo && for_reduction
10030 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10032 if (reduction_type == EXTRACT_LAST_REDUCTION)
10033 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10034 ncopies * vec_num, vectype, NULL);
10035 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10036 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10038 if (dump_enabled_p ())
10039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10040 "conditional reduction prevents the use"
10041 " of partial vectors.\n");
10042 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10046 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10047 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10048 cost_vec, kind);
10049 return true;
10052 /* Transform. */
10054 if (!slp_node)
10056 vec_oprnds0.create (1);
10057 vec_oprnds1.create (1);
10058 vec_oprnds2.create (1);
10059 vec_oprnds3.create (1);
10062 /* Handle def. */
10063 scalar_dest = gimple_assign_lhs (stmt);
10064 if (reduction_type != EXTRACT_LAST_REDUCTION)
10065 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10067 bool swap_cond_operands = false;
10069 /* See whether another part of the vectorized code applies a loop
10070 mask to the condition, or to its inverse. */
10072 vec_loop_masks *masks = NULL;
10073 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10075 if (reduction_type == EXTRACT_LAST_REDUCTION)
10076 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10077 else
10079 scalar_cond_masked_key cond (cond_expr, ncopies);
10080 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10081 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10082 else
10084 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10085 cond.code = invert_tree_comparison (cond.code, honor_nans);
10086 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10088 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10089 cond_code = cond.code;
10090 swap_cond_operands = true;
10096 /* Handle cond expr. */
10097 if (masked)
10098 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10099 cond_expr, &vec_oprnds0, comp_vectype,
10100 then_clause, &vec_oprnds2, vectype,
10101 reduction_type != EXTRACT_LAST_REDUCTION
10102 ? else_clause : NULL, &vec_oprnds3, vectype);
10103 else
10104 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10105 cond_expr0, &vec_oprnds0, comp_vectype,
10106 cond_expr1, &vec_oprnds1, comp_vectype,
10107 then_clause, &vec_oprnds2, vectype,
10108 reduction_type != EXTRACT_LAST_REDUCTION
10109 ? else_clause : NULL, &vec_oprnds3, vectype);
10111 /* Arguments are ready. Create the new vector stmt. */
10112 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10114 vec_then_clause = vec_oprnds2[i];
10115 if (reduction_type != EXTRACT_LAST_REDUCTION)
10116 vec_else_clause = vec_oprnds3[i];
10118 if (swap_cond_operands)
10119 std::swap (vec_then_clause, vec_else_clause);
10121 if (masked)
10122 vec_compare = vec_cond_lhs;
10123 else
10125 vec_cond_rhs = vec_oprnds1[i];
10126 if (bitop1 == NOP_EXPR)
10128 gimple_seq stmts = NULL;
10129 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10130 vec_cond_lhs, vec_cond_rhs);
10131 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10133 else
10135 new_temp = make_ssa_name (vec_cmp_type);
10136 gassign *new_stmt;
10137 if (bitop1 == BIT_NOT_EXPR)
10138 new_stmt = gimple_build_assign (new_temp, bitop1,
10139 vec_cond_rhs);
10140 else
10141 new_stmt
10142 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10143 vec_cond_rhs);
10144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10145 if (bitop2 == NOP_EXPR)
10146 vec_compare = new_temp;
10147 else if (bitop2 == BIT_NOT_EXPR)
10149 /* Instead of doing ~x ? y : z do x ? z : y. */
10150 vec_compare = new_temp;
10151 std::swap (vec_then_clause, vec_else_clause);
10153 else
10155 vec_compare = make_ssa_name (vec_cmp_type);
10156 new_stmt
10157 = gimple_build_assign (vec_compare, bitop2,
10158 vec_cond_lhs, new_temp);
10159 vect_finish_stmt_generation (vinfo, stmt_info,
10160 new_stmt, gsi);
10165 /* If we decided to apply a loop mask to the result of the vector
10166 comparison, AND the comparison with the mask now. Later passes
10167 should then be able to reuse the AND results between mulitple
10168 vector statements.
10170 For example:
10171 for (int i = 0; i < 100; ++i)
10172 x[i] = y[i] ? z[i] : 10;
10174 results in following optimized GIMPLE:
10176 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10177 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10178 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10179 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10180 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10181 vect_iftmp.11_47, { 10, ... }>;
10183 instead of using a masked and unmasked forms of
10184 vec != { 0, ... } (masked in the MASK_LOAD,
10185 unmasked in the VEC_COND_EXPR). */
10187 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10188 in cases where that's necessary. */
10190 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10192 if (!is_gimple_val (vec_compare))
10194 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10195 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10196 vec_compare);
10197 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10198 vec_compare = vec_compare_name;
10201 if (must_invert_cmp_result)
10203 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10204 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10205 BIT_NOT_EXPR,
10206 vec_compare);
10207 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10208 vec_compare = vec_compare_name;
10211 if (masks)
10213 unsigned vec_num = vec_oprnds0.length ();
10214 tree loop_mask
10215 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10216 vectype, i);
10217 tree tmp2 = make_ssa_name (vec_cmp_type);
10218 gassign *g
10219 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10220 loop_mask);
10221 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10222 vec_compare = tmp2;
10226 gimple *new_stmt;
10227 if (reduction_type == EXTRACT_LAST_REDUCTION)
10229 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10230 tree lhs = gimple_get_lhs (old_stmt);
10231 new_stmt = gimple_build_call_internal
10232 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10233 vec_then_clause);
10234 gimple_call_set_lhs (new_stmt, lhs);
10235 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10236 if (old_stmt == gsi_stmt (*gsi))
10237 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10238 else
10240 /* In this case we're moving the definition to later in the
10241 block. That doesn't matter because the only uses of the
10242 lhs are in phi statements. */
10243 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10244 gsi_remove (&old_gsi, true);
10245 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10248 else
10250 new_temp = make_ssa_name (vec_dest);
10251 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10252 vec_then_clause, vec_else_clause);
10253 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10255 if (slp_node)
10256 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10257 else
10258 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10261 if (!slp_node)
10262 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10264 vec_oprnds0.release ();
10265 vec_oprnds1.release ();
10266 vec_oprnds2.release ();
10267 vec_oprnds3.release ();
10269 return true;
10272 /* vectorizable_comparison.
10274 Check if STMT_INFO is comparison expression that can be vectorized.
10275 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10276 comparison, put it in VEC_STMT, and insert it at GSI.
10278 Return true if STMT_INFO is vectorizable in this way. */
10280 static bool
10281 vectorizable_comparison (vec_info *vinfo,
10282 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10283 gimple **vec_stmt,
10284 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10286 tree lhs, rhs1, rhs2;
10287 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10288 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10289 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10290 tree new_temp;
10291 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10292 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10293 int ndts = 2;
10294 poly_uint64 nunits;
10295 int ncopies;
10296 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10297 int i;
10298 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10299 vec<tree> vec_oprnds0 = vNULL;
10300 vec<tree> vec_oprnds1 = vNULL;
10301 tree mask_type;
10302 tree mask;
10304 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10305 return false;
10307 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10308 return false;
10310 mask_type = vectype;
10311 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10313 if (slp_node)
10314 ncopies = 1;
10315 else
10316 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10318 gcc_assert (ncopies >= 1);
10319 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10320 return false;
10322 if (STMT_VINFO_LIVE_P (stmt_info))
10324 if (dump_enabled_p ())
10325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10326 "value used after loop.\n");
10327 return false;
10330 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10331 if (!stmt)
10332 return false;
10334 code = gimple_assign_rhs_code (stmt);
10336 if (TREE_CODE_CLASS (code) != tcc_comparison)
10337 return false;
10339 slp_tree slp_rhs1, slp_rhs2;
10340 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10341 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10342 return false;
10344 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10345 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10346 return false;
10348 if (vectype1 && vectype2
10349 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10350 TYPE_VECTOR_SUBPARTS (vectype2)))
10351 return false;
10353 vectype = vectype1 ? vectype1 : vectype2;
10355 /* Invariant comparison. */
10356 if (!vectype)
10358 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10359 vectype = mask_type;
10360 else
10361 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10362 slp_node);
10363 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10364 return false;
10366 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10367 return false;
10369 /* Can't compare mask and non-mask types. */
10370 if (vectype1 && vectype2
10371 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10372 return false;
10374 /* Boolean values may have another representation in vectors
10375 and therefore we prefer bit operations over comparison for
10376 them (which also works for scalar masks). We store opcodes
10377 to use in bitop1 and bitop2. Statement is vectorized as
10378 BITOP2 (rhs1 BITOP1 rhs2) or
10379 rhs1 BITOP2 (BITOP1 rhs2)
10380 depending on bitop1 and bitop2 arity. */
10381 bool swap_p = false;
10382 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10384 if (code == GT_EXPR)
10386 bitop1 = BIT_NOT_EXPR;
10387 bitop2 = BIT_AND_EXPR;
10389 else if (code == GE_EXPR)
10391 bitop1 = BIT_NOT_EXPR;
10392 bitop2 = BIT_IOR_EXPR;
10394 else if (code == LT_EXPR)
10396 bitop1 = BIT_NOT_EXPR;
10397 bitop2 = BIT_AND_EXPR;
10398 swap_p = true;
10400 else if (code == LE_EXPR)
10402 bitop1 = BIT_NOT_EXPR;
10403 bitop2 = BIT_IOR_EXPR;
10404 swap_p = true;
10406 else
10408 bitop1 = BIT_XOR_EXPR;
10409 if (code == EQ_EXPR)
10410 bitop2 = BIT_NOT_EXPR;
10414 if (!vec_stmt)
10416 if (bitop1 == NOP_EXPR)
10418 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10419 return false;
10421 else
10423 machine_mode mode = TYPE_MODE (vectype);
10424 optab optab;
10426 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10427 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10428 return false;
10430 if (bitop2 != NOP_EXPR)
10432 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10433 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10434 return false;
10438 /* Put types on constant and invariant SLP children. */
10439 if (slp_node
10440 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10441 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10443 if (dump_enabled_p ())
10444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10445 "incompatible vector types for invariants\n");
10446 return false;
10449 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10450 vect_model_simple_cost (vinfo, stmt_info,
10451 ncopies * (1 + (bitop2 != NOP_EXPR)),
10452 dts, ndts, slp_node, cost_vec);
10453 return true;
10456 /* Transform. */
10457 if (!slp_node)
10459 vec_oprnds0.create (1);
10460 vec_oprnds1.create (1);
10463 /* Handle def. */
10464 lhs = gimple_assign_lhs (stmt);
10465 mask = vect_create_destination_var (lhs, mask_type);
10467 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10468 rhs1, &vec_oprnds0, vectype,
10469 rhs2, &vec_oprnds1, vectype);
10470 if (swap_p)
10471 std::swap (vec_oprnds0, vec_oprnds1);
10473 /* Arguments are ready. Create the new vector stmt. */
10474 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10476 gimple *new_stmt;
10477 vec_rhs2 = vec_oprnds1[i];
10479 new_temp = make_ssa_name (mask);
10480 if (bitop1 == NOP_EXPR)
10482 new_stmt = gimple_build_assign (new_temp, code,
10483 vec_rhs1, vec_rhs2);
10484 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10486 else
10488 if (bitop1 == BIT_NOT_EXPR)
10489 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10490 else
10491 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10492 vec_rhs2);
10493 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10494 if (bitop2 != NOP_EXPR)
10496 tree res = make_ssa_name (mask);
10497 if (bitop2 == BIT_NOT_EXPR)
10498 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10499 else
10500 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10501 new_temp);
10502 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10505 if (slp_node)
10506 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10507 else
10508 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10511 if (!slp_node)
10512 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10514 vec_oprnds0.release ();
10515 vec_oprnds1.release ();
10517 return true;
10520 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10521 can handle all live statements in the node. Otherwise return true
10522 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10523 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10525 static bool
10526 can_vectorize_live_stmts (loop_vec_info loop_vinfo,
10527 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10528 slp_tree slp_node, slp_instance slp_node_instance,
10529 bool vec_stmt_p,
10530 stmt_vector_for_cost *cost_vec)
10532 if (slp_node)
10534 stmt_vec_info slp_stmt_info;
10535 unsigned int i;
10536 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10538 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10539 && !vectorizable_live_operation (loop_vinfo,
10540 slp_stmt_info, gsi, slp_node,
10541 slp_node_instance, i,
10542 vec_stmt_p, cost_vec))
10543 return false;
10546 else if (STMT_VINFO_LIVE_P (stmt_info)
10547 && !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
10548 slp_node, slp_node_instance, -1,
10549 vec_stmt_p, cost_vec))
10550 return false;
10552 return true;
10555 /* Make sure the statement is vectorizable. */
10557 opt_result
10558 vect_analyze_stmt (vec_info *vinfo,
10559 stmt_vec_info stmt_info, bool *need_to_vectorize,
10560 slp_tree node, slp_instance node_instance,
10561 stmt_vector_for_cost *cost_vec)
10563 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10564 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10565 bool ok;
10566 gimple_seq pattern_def_seq;
10568 if (dump_enabled_p ())
10569 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10570 stmt_info->stmt);
10572 if (gimple_has_volatile_ops (stmt_info->stmt))
10573 return opt_result::failure_at (stmt_info->stmt,
10574 "not vectorized:"
10575 " stmt has volatile operands: %G\n",
10576 stmt_info->stmt);
10578 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10579 && node == NULL
10580 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10582 gimple_stmt_iterator si;
10584 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10586 stmt_vec_info pattern_def_stmt_info
10587 = vinfo->lookup_stmt (gsi_stmt (si));
10588 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10589 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10591 /* Analyze def stmt of STMT if it's a pattern stmt. */
10592 if (dump_enabled_p ())
10593 dump_printf_loc (MSG_NOTE, vect_location,
10594 "==> examining pattern def statement: %G",
10595 pattern_def_stmt_info->stmt);
10597 opt_result res
10598 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10599 need_to_vectorize, node, node_instance,
10600 cost_vec);
10601 if (!res)
10602 return res;
10607 /* Skip stmts that do not need to be vectorized. In loops this is expected
10608 to include:
10609 - the COND_EXPR which is the loop exit condition
10610 - any LABEL_EXPRs in the loop
10611 - computations that are used only for array indexing or loop control.
10612 In basic blocks we only analyze statements that are a part of some SLP
10613 instance, therefore, all the statements are relevant.
10615 Pattern statement needs to be analyzed instead of the original statement
10616 if the original statement is not relevant. Otherwise, we analyze both
10617 statements. In basic blocks we are called from some SLP instance
10618 traversal, don't analyze pattern stmts instead, the pattern stmts
10619 already will be part of SLP instance. */
10621 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10622 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10623 && !STMT_VINFO_LIVE_P (stmt_info))
10625 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10626 && pattern_stmt_info
10627 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10628 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10630 /* Analyze PATTERN_STMT instead of the original stmt. */
10631 stmt_info = pattern_stmt_info;
10632 if (dump_enabled_p ())
10633 dump_printf_loc (MSG_NOTE, vect_location,
10634 "==> examining pattern statement: %G",
10635 stmt_info->stmt);
10637 else
10639 if (dump_enabled_p ())
10640 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10642 return opt_result::success ();
10645 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10646 && node == NULL
10647 && pattern_stmt_info
10648 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10649 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10651 /* Analyze PATTERN_STMT too. */
10652 if (dump_enabled_p ())
10653 dump_printf_loc (MSG_NOTE, vect_location,
10654 "==> examining pattern statement: %G",
10655 pattern_stmt_info->stmt);
10657 opt_result res
10658 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10659 node_instance, cost_vec);
10660 if (!res)
10661 return res;
10664 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10666 case vect_internal_def:
10667 break;
10669 case vect_reduction_def:
10670 case vect_nested_cycle:
10671 gcc_assert (!bb_vinfo
10672 && (relevance == vect_used_in_outer
10673 || relevance == vect_used_in_outer_by_reduction
10674 || relevance == vect_used_by_reduction
10675 || relevance == vect_unused_in_scope
10676 || relevance == vect_used_only_live));
10677 break;
10679 case vect_induction_def:
10680 gcc_assert (!bb_vinfo);
10681 break;
10683 case vect_constant_def:
10684 case vect_external_def:
10685 case vect_unknown_def_type:
10686 default:
10687 gcc_unreachable ();
10690 if (STMT_VINFO_RELEVANT_P (stmt_info))
10692 tree type = gimple_expr_type (stmt_info->stmt);
10693 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10694 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10695 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10696 || (call && gimple_call_lhs (call) == NULL_TREE));
10697 *need_to_vectorize = true;
10700 if (PURE_SLP_STMT (stmt_info) && !node)
10702 if (dump_enabled_p ())
10703 dump_printf_loc (MSG_NOTE, vect_location,
10704 "handled only by SLP analysis\n");
10705 return opt_result::success ();
10708 ok = true;
10709 if (!bb_vinfo
10710 && (STMT_VINFO_RELEVANT_P (stmt_info)
10711 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10712 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10713 -mveclibabi= takes preference over library functions with
10714 the simd attribute. */
10715 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10716 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10717 cost_vec)
10718 || vectorizable_conversion (vinfo, stmt_info,
10719 NULL, NULL, node, cost_vec)
10720 || vectorizable_operation (vinfo, stmt_info,
10721 NULL, NULL, node, cost_vec)
10722 || vectorizable_assignment (vinfo, stmt_info,
10723 NULL, NULL, node, cost_vec)
10724 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10725 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10726 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10727 node, node_instance, cost_vec)
10728 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10729 NULL, node, cost_vec)
10730 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10731 || vectorizable_condition (vinfo, stmt_info,
10732 NULL, NULL, node, cost_vec)
10733 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10734 cost_vec)
10735 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10736 stmt_info, NULL, node));
10737 else
10739 if (bb_vinfo)
10740 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10741 || vectorizable_simd_clone_call (vinfo, stmt_info,
10742 NULL, NULL, node, cost_vec)
10743 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10744 cost_vec)
10745 || vectorizable_shift (vinfo, stmt_info,
10746 NULL, NULL, node, cost_vec)
10747 || vectorizable_operation (vinfo, stmt_info,
10748 NULL, NULL, node, cost_vec)
10749 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10750 cost_vec)
10751 || vectorizable_load (vinfo, stmt_info,
10752 NULL, NULL, node, cost_vec)
10753 || vectorizable_store (vinfo, stmt_info,
10754 NULL, NULL, node, cost_vec)
10755 || vectorizable_condition (vinfo, stmt_info,
10756 NULL, NULL, node, cost_vec)
10757 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10758 cost_vec));
10761 if (!ok)
10762 return opt_result::failure_at (stmt_info->stmt,
10763 "not vectorized:"
10764 " relevant stmt not supported: %G",
10765 stmt_info->stmt);
10767 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10768 need extra handling, except for vectorizable reductions. */
10769 if (!bb_vinfo
10770 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10771 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10772 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10773 stmt_info, NULL, node, node_instance,
10774 false, cost_vec))
10775 return opt_result::failure_at (stmt_info->stmt,
10776 "not vectorized:"
10777 " live stmt not supported: %G",
10778 stmt_info->stmt);
10780 return opt_result::success ();
10784 /* Function vect_transform_stmt.
10786 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10788 bool
10789 vect_transform_stmt (vec_info *vinfo,
10790 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10791 slp_tree slp_node, slp_instance slp_node_instance)
10793 bool is_store = false;
10794 gimple *vec_stmt = NULL;
10795 bool done;
10797 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10799 switch (STMT_VINFO_TYPE (stmt_info))
10801 case type_demotion_vec_info_type:
10802 case type_promotion_vec_info_type:
10803 case type_conversion_vec_info_type:
10804 done = vectorizable_conversion (vinfo, stmt_info,
10805 gsi, &vec_stmt, slp_node, NULL);
10806 gcc_assert (done);
10807 break;
10809 case induc_vec_info_type:
10810 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10811 stmt_info, &vec_stmt, slp_node,
10812 NULL);
10813 gcc_assert (done);
10814 break;
10816 case shift_vec_info_type:
10817 done = vectorizable_shift (vinfo, stmt_info,
10818 gsi, &vec_stmt, slp_node, NULL);
10819 gcc_assert (done);
10820 break;
10822 case op_vec_info_type:
10823 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10824 NULL);
10825 gcc_assert (done);
10826 break;
10828 case assignment_vec_info_type:
10829 done = vectorizable_assignment (vinfo, stmt_info,
10830 gsi, &vec_stmt, slp_node, NULL);
10831 gcc_assert (done);
10832 break;
10834 case load_vec_info_type:
10835 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10836 NULL);
10837 gcc_assert (done);
10838 break;
10840 case store_vec_info_type:
10841 done = vectorizable_store (vinfo, stmt_info,
10842 gsi, &vec_stmt, slp_node, NULL);
10843 gcc_assert (done);
10844 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10846 /* In case of interleaving, the whole chain is vectorized when the
10847 last store in the chain is reached. Store stmts before the last
10848 one are skipped, and there vec_stmt_info shouldn't be freed
10849 meanwhile. */
10850 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10851 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10852 is_store = true;
10854 else
10855 is_store = true;
10856 break;
10858 case condition_vec_info_type:
10859 done = vectorizable_condition (vinfo, stmt_info,
10860 gsi, &vec_stmt, slp_node, NULL);
10861 gcc_assert (done);
10862 break;
10864 case comparison_vec_info_type:
10865 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10866 slp_node, NULL);
10867 gcc_assert (done);
10868 break;
10870 case call_vec_info_type:
10871 done = vectorizable_call (vinfo, stmt_info,
10872 gsi, &vec_stmt, slp_node, NULL);
10873 break;
10875 case call_simd_clone_vec_info_type:
10876 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10877 slp_node, NULL);
10878 break;
10880 case reduc_vec_info_type:
10881 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10882 gsi, &vec_stmt, slp_node);
10883 gcc_assert (done);
10884 break;
10886 case cycle_phi_info_type:
10887 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10888 &vec_stmt, slp_node, slp_node_instance);
10889 gcc_assert (done);
10890 break;
10892 case lc_phi_info_type:
10893 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10894 stmt_info, &vec_stmt, slp_node);
10895 gcc_assert (done);
10896 break;
10898 default:
10899 if (!STMT_VINFO_LIVE_P (stmt_info))
10901 if (dump_enabled_p ())
10902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10903 "stmt not supported.\n");
10904 gcc_unreachable ();
10906 done = true;
10909 if (!slp_node && vec_stmt)
10910 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
10912 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
10913 return is_store;
10915 /* If this stmt defines a value used on a backedge, update the
10916 vectorized PHIs. */
10917 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
10918 stmt_vec_info reduc_info;
10919 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
10920 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
10921 && (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
10922 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
10923 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
10925 gphi *phi;
10926 edge e;
10927 if (!slp_node
10928 && (phi = dyn_cast <gphi *>
10929 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
10930 && dominated_by_p (CDI_DOMINATORS,
10931 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
10932 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
10933 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
10934 == gimple_get_lhs (orig_stmt_info->stmt)))
10936 vec<gimple *> &phi_info
10937 = STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (orig_stmt_info));
10938 vec<gimple *> &vec_stmt
10939 = STMT_VINFO_VEC_STMTS (stmt_info);
10940 gcc_assert (phi_info.length () == vec_stmt.length ());
10941 for (unsigned i = 0; i < phi_info.length (); ++i)
10942 add_phi_arg (as_a <gphi *> (phi_info[i]),
10943 gimple_get_lhs (vec_stmt[i]), e,
10944 gimple_phi_arg_location (phi, e->dest_idx));
10946 else if (slp_node
10947 && slp_node != slp_node_instance->reduc_phis)
10949 slp_tree phi_node = slp_node_instance->reduc_phis;
10950 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
10951 e = loop_latch_edge (gimple_bb (phi)->loop_father);
10952 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
10953 == SLP_TREE_VEC_STMTS (slp_node).length ());
10954 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
10955 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]),
10956 vect_get_slp_vect_def (slp_node, i),
10957 e, gimple_phi_arg_location (phi, e->dest_idx));
10961 /* Handle stmts whose DEF is used outside the loop-nest that is
10962 being vectorized. */
10963 if (is_a <loop_vec_info> (vinfo))
10964 done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10965 stmt_info, gsi, slp_node,
10966 slp_node_instance, true, NULL);
10967 gcc_assert (done);
10969 return false;
10973 /* Remove a group of stores (for SLP or interleaving), free their
10974 stmt_vec_info. */
10976 void
10977 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
10979 stmt_vec_info next_stmt_info = first_stmt_info;
10981 while (next_stmt_info)
10983 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10984 next_stmt_info = vect_orig_stmt (next_stmt_info);
10985 /* Free the attached stmt_vec_info and remove the stmt. */
10986 vinfo->remove_stmt (next_stmt_info);
10987 next_stmt_info = tmp;
10991 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10992 elements of type SCALAR_TYPE, or null if the target doesn't support
10993 such a type.
10995 If NUNITS is zero, return a vector type that contains elements of
10996 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10998 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10999 for this vectorization region and want to "autodetect" the best choice.
11000 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11001 and we want the new type to be interoperable with it. PREVAILING_MODE
11002 in this case can be a scalar integer mode or a vector mode; when it
11003 is a vector mode, the function acts like a tree-level version of
11004 related_vector_mode. */
11006 tree
11007 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11008 tree scalar_type, poly_uint64 nunits)
11010 tree orig_scalar_type = scalar_type;
11011 scalar_mode inner_mode;
11012 machine_mode simd_mode;
11013 tree vectype;
11015 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11016 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11017 return NULL_TREE;
11019 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11021 /* For vector types of elements whose mode precision doesn't
11022 match their types precision we use a element type of mode
11023 precision. The vectorization routines will have to make sure
11024 they support the proper result truncation/extension.
11025 We also make sure to build vector types with INTEGER_TYPE
11026 component type only. */
11027 if (INTEGRAL_TYPE_P (scalar_type)
11028 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11029 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11030 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11031 TYPE_UNSIGNED (scalar_type));
11033 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11034 When the component mode passes the above test simply use a type
11035 corresponding to that mode. The theory is that any use that
11036 would cause problems with this will disable vectorization anyway. */
11037 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11038 && !INTEGRAL_TYPE_P (scalar_type))
11039 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11041 /* We can't build a vector type of elements with alignment bigger than
11042 their size. */
11043 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11044 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11045 TYPE_UNSIGNED (scalar_type));
11047 /* If we felt back to using the mode fail if there was
11048 no scalar type for it. */
11049 if (scalar_type == NULL_TREE)
11050 return NULL_TREE;
11052 /* If no prevailing mode was supplied, use the mode the target prefers.
11053 Otherwise lookup a vector mode based on the prevailing mode. */
11054 if (prevailing_mode == VOIDmode)
11056 gcc_assert (known_eq (nunits, 0U));
11057 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11058 if (SCALAR_INT_MODE_P (simd_mode))
11060 /* Traditional behavior is not to take the integer mode
11061 literally, but simply to use it as a way of determining
11062 the vector size. It is up to mode_for_vector to decide
11063 what the TYPE_MODE should be.
11065 Note that nunits == 1 is allowed in order to support single
11066 element vector types. */
11067 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11068 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11069 return NULL_TREE;
11072 else if (SCALAR_INT_MODE_P (prevailing_mode)
11073 || !related_vector_mode (prevailing_mode,
11074 inner_mode, nunits).exists (&simd_mode))
11076 /* Fall back to using mode_for_vector, mostly in the hope of being
11077 able to use an integer mode. */
11078 if (known_eq (nunits, 0U)
11079 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11080 return NULL_TREE;
11082 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11083 return NULL_TREE;
11086 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11088 /* In cases where the mode was chosen by mode_for_vector, check that
11089 the target actually supports the chosen mode, or that it at least
11090 allows the vector mode to be replaced by a like-sized integer. */
11091 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11092 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11093 return NULL_TREE;
11095 /* Re-attach the address-space qualifier if we canonicalized the scalar
11096 type. */
11097 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11098 return build_qualified_type
11099 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11101 return vectype;
11104 /* Function get_vectype_for_scalar_type.
11106 Returns the vector type corresponding to SCALAR_TYPE as supported
11107 by the target. If GROUP_SIZE is nonzero and we're performing BB
11108 vectorization, make sure that the number of elements in the vector
11109 is no bigger than GROUP_SIZE. */
11111 tree
11112 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11113 unsigned int group_size)
11115 /* For BB vectorization, we should always have a group size once we've
11116 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11117 are tentative requests during things like early data reference
11118 analysis and pattern recognition. */
11119 if (is_a <bb_vec_info> (vinfo))
11120 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11121 else
11122 group_size = 0;
11124 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11125 scalar_type);
11126 if (vectype && vinfo->vector_mode == VOIDmode)
11127 vinfo->vector_mode = TYPE_MODE (vectype);
11129 /* Register the natural choice of vector type, before the group size
11130 has been applied. */
11131 if (vectype)
11132 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11134 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11135 try again with an explicit number of elements. */
11136 if (vectype
11137 && group_size
11138 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11140 /* Start with the biggest number of units that fits within
11141 GROUP_SIZE and halve it until we find a valid vector type.
11142 Usually either the first attempt will succeed or all will
11143 fail (in the latter case because GROUP_SIZE is too small
11144 for the target), but it's possible that a target could have
11145 a hole between supported vector types.
11147 If GROUP_SIZE is not a power of 2, this has the effect of
11148 trying the largest power of 2 that fits within the group,
11149 even though the group is not a multiple of that vector size.
11150 The BB vectorizer will then try to carve up the group into
11151 smaller pieces. */
11152 unsigned int nunits = 1 << floor_log2 (group_size);
11155 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11156 scalar_type, nunits);
11157 nunits /= 2;
11159 while (nunits > 1 && !vectype);
11162 return vectype;
11165 /* Return the vector type corresponding to SCALAR_TYPE as supported
11166 by the target. NODE, if nonnull, is the SLP tree node that will
11167 use the returned vector type. */
11169 tree
11170 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11172 unsigned int group_size = 0;
11173 if (node)
11174 group_size = SLP_TREE_LANES (node);
11175 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11178 /* Function get_mask_type_for_scalar_type.
11180 Returns the mask type corresponding to a result of comparison
11181 of vectors of specified SCALAR_TYPE as supported by target.
11182 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11183 make sure that the number of elements in the vector is no bigger
11184 than GROUP_SIZE. */
11186 tree
11187 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11188 unsigned int group_size)
11190 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11192 if (!vectype)
11193 return NULL;
11195 return truth_type_for (vectype);
11198 /* Function get_same_sized_vectype
11200 Returns a vector type corresponding to SCALAR_TYPE of size
11201 VECTOR_TYPE if supported by the target. */
11203 tree
11204 get_same_sized_vectype (tree scalar_type, tree vector_type)
11206 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11207 return truth_type_for (vector_type);
11209 poly_uint64 nunits;
11210 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11211 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11212 return NULL_TREE;
11214 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11215 scalar_type, nunits);
11218 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11219 would not change the chosen vector modes. */
11221 bool
11222 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11224 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11225 i != vinfo->used_vector_modes.end (); ++i)
11226 if (!VECTOR_MODE_P (*i)
11227 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11228 return false;
11229 return true;
11232 /* Function vect_is_simple_use.
11234 Input:
11235 VINFO - the vect info of the loop or basic block that is being vectorized.
11236 OPERAND - operand in the loop or bb.
11237 Output:
11238 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11239 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11240 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11241 the definition could be anywhere in the function
11242 DT - the type of definition
11244 Returns whether a stmt with OPERAND can be vectorized.
11245 For loops, supportable operands are constants, loop invariants, and operands
11246 that are defined by the current iteration of the loop. Unsupportable
11247 operands are those that are defined by a previous iteration of the loop (as
11248 is the case in reduction/induction computations).
11249 For basic blocks, supportable operands are constants and bb invariants.
11250 For now, operands defined outside the basic block are not supported. */
11252 bool
11253 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11254 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11256 if (def_stmt_info_out)
11257 *def_stmt_info_out = NULL;
11258 if (def_stmt_out)
11259 *def_stmt_out = NULL;
11260 *dt = vect_unknown_def_type;
11262 if (dump_enabled_p ())
11264 dump_printf_loc (MSG_NOTE, vect_location,
11265 "vect_is_simple_use: operand ");
11266 if (TREE_CODE (operand) == SSA_NAME
11267 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11268 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11269 else
11270 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11273 if (CONSTANT_CLASS_P (operand))
11274 *dt = vect_constant_def;
11275 else if (is_gimple_min_invariant (operand))
11276 *dt = vect_external_def;
11277 else if (TREE_CODE (operand) != SSA_NAME)
11278 *dt = vect_unknown_def_type;
11279 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11280 *dt = vect_external_def;
11281 else
11283 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11284 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11285 if (!stmt_vinfo)
11286 *dt = vect_external_def;
11287 else
11289 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11290 def_stmt = stmt_vinfo->stmt;
11291 switch (gimple_code (def_stmt))
11293 case GIMPLE_PHI:
11294 case GIMPLE_ASSIGN:
11295 case GIMPLE_CALL:
11296 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11297 break;
11298 default:
11299 *dt = vect_unknown_def_type;
11300 break;
11302 if (def_stmt_info_out)
11303 *def_stmt_info_out = stmt_vinfo;
11305 if (def_stmt_out)
11306 *def_stmt_out = def_stmt;
11309 if (dump_enabled_p ())
11311 dump_printf (MSG_NOTE, ", type of def: ");
11312 switch (*dt)
11314 case vect_uninitialized_def:
11315 dump_printf (MSG_NOTE, "uninitialized\n");
11316 break;
11317 case vect_constant_def:
11318 dump_printf (MSG_NOTE, "constant\n");
11319 break;
11320 case vect_external_def:
11321 dump_printf (MSG_NOTE, "external\n");
11322 break;
11323 case vect_internal_def:
11324 dump_printf (MSG_NOTE, "internal\n");
11325 break;
11326 case vect_induction_def:
11327 dump_printf (MSG_NOTE, "induction\n");
11328 break;
11329 case vect_reduction_def:
11330 dump_printf (MSG_NOTE, "reduction\n");
11331 break;
11332 case vect_double_reduction_def:
11333 dump_printf (MSG_NOTE, "double reduction\n");
11334 break;
11335 case vect_nested_cycle:
11336 dump_printf (MSG_NOTE, "nested cycle\n");
11337 break;
11338 case vect_unknown_def_type:
11339 dump_printf (MSG_NOTE, "unknown\n");
11340 break;
11344 if (*dt == vect_unknown_def_type)
11346 if (dump_enabled_p ())
11347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11348 "Unsupported pattern.\n");
11349 return false;
11352 return true;
11355 /* Function vect_is_simple_use.
11357 Same as vect_is_simple_use but also determines the vector operand
11358 type of OPERAND and stores it to *VECTYPE. If the definition of
11359 OPERAND is vect_uninitialized_def, vect_constant_def or
11360 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11361 is responsible to compute the best suited vector type for the
11362 scalar operand. */
11364 bool
11365 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11366 tree *vectype, stmt_vec_info *def_stmt_info_out,
11367 gimple **def_stmt_out)
11369 stmt_vec_info def_stmt_info;
11370 gimple *def_stmt;
11371 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11372 return false;
11374 if (def_stmt_out)
11375 *def_stmt_out = def_stmt;
11376 if (def_stmt_info_out)
11377 *def_stmt_info_out = def_stmt_info;
11379 /* Now get a vector type if the def is internal, otherwise supply
11380 NULL_TREE and leave it up to the caller to figure out a proper
11381 type for the use stmt. */
11382 if (*dt == vect_internal_def
11383 || *dt == vect_induction_def
11384 || *dt == vect_reduction_def
11385 || *dt == vect_double_reduction_def
11386 || *dt == vect_nested_cycle)
11388 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11389 gcc_assert (*vectype != NULL_TREE);
11390 if (dump_enabled_p ())
11391 dump_printf_loc (MSG_NOTE, vect_location,
11392 "vect_is_simple_use: vectype %T\n", *vectype);
11394 else if (*dt == vect_uninitialized_def
11395 || *dt == vect_constant_def
11396 || *dt == vect_external_def)
11397 *vectype = NULL_TREE;
11398 else
11399 gcc_unreachable ();
11401 return true;
11404 /* Function vect_is_simple_use.
11406 Same as vect_is_simple_use but determines the operand by operand
11407 position OPERAND from either STMT or SLP_NODE, filling in *OP
11408 and *SLP_DEF (when SLP_NODE is not NULL). */
11410 bool
11411 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11412 unsigned operand, tree *op, slp_tree *slp_def,
11413 enum vect_def_type *dt,
11414 tree *vectype, stmt_vec_info *def_stmt_info_out)
11416 if (slp_node)
11418 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11419 *slp_def = child;
11420 *vectype = SLP_TREE_VECTYPE (child);
11421 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11423 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11424 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11426 else
11428 if (def_stmt_info_out)
11429 *def_stmt_info_out = NULL;
11430 *op = SLP_TREE_SCALAR_OPS (child)[0];
11431 *dt = SLP_TREE_DEF_TYPE (child);
11432 return true;
11435 else
11437 *slp_def = NULL;
11438 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11440 if (gimple_assign_rhs_code (ass) == COND_EXPR
11441 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11443 if (operand < 2)
11444 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11445 else
11446 *op = gimple_op (ass, operand);
11448 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11449 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11450 else
11451 *op = gimple_op (ass, operand + 1);
11453 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11455 if (gimple_call_internal_p (call)
11456 && internal_store_fn_p (gimple_call_internal_fn (call)))
11457 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11458 (call));
11459 *op = gimple_call_arg (call, operand);
11461 else
11462 gcc_unreachable ();
11463 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11467 /* If OP is not NULL and is external or constant update its vector
11468 type with VECTYPE. Returns true if successful or false if not,
11469 for example when conflicting vector types are present. */
11471 bool
11472 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11474 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11475 return true;
11476 if (SLP_TREE_VECTYPE (op))
11477 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11478 SLP_TREE_VECTYPE (op) = vectype;
11479 return true;
11482 /* Function supportable_widening_operation
11484 Check whether an operation represented by the code CODE is a
11485 widening operation that is supported by the target platform in
11486 vector form (i.e., when operating on arguments of type VECTYPE_IN
11487 producing a result of type VECTYPE_OUT).
11489 Widening operations we currently support are NOP (CONVERT), FLOAT,
11490 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11491 are supported by the target platform either directly (via vector
11492 tree-codes), or via target builtins.
11494 Output:
11495 - CODE1 and CODE2 are codes of vector operations to be used when
11496 vectorizing the operation, if available.
11497 - MULTI_STEP_CVT determines the number of required intermediate steps in
11498 case of multi-step conversion (like char->short->int - in that case
11499 MULTI_STEP_CVT will be 1).
11500 - INTERM_TYPES contains the intermediate type required to perform the
11501 widening operation (short in the above example). */
11503 bool
11504 supportable_widening_operation (vec_info *vinfo,
11505 enum tree_code code, stmt_vec_info stmt_info,
11506 tree vectype_out, tree vectype_in,
11507 enum tree_code *code1, enum tree_code *code2,
11508 int *multi_step_cvt,
11509 vec<tree> *interm_types)
11511 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11512 class loop *vect_loop = NULL;
11513 machine_mode vec_mode;
11514 enum insn_code icode1, icode2;
11515 optab optab1, optab2;
11516 tree vectype = vectype_in;
11517 tree wide_vectype = vectype_out;
11518 enum tree_code c1, c2;
11519 int i;
11520 tree prev_type, intermediate_type;
11521 machine_mode intermediate_mode, prev_mode;
11522 optab optab3, optab4;
11524 *multi_step_cvt = 0;
11525 if (loop_info)
11526 vect_loop = LOOP_VINFO_LOOP (loop_info);
11528 switch (code)
11530 case WIDEN_MULT_EXPR:
11531 /* The result of a vectorized widening operation usually requires
11532 two vectors (because the widened results do not fit into one vector).
11533 The generated vector results would normally be expected to be
11534 generated in the same order as in the original scalar computation,
11535 i.e. if 8 results are generated in each vector iteration, they are
11536 to be organized as follows:
11537 vect1: [res1,res2,res3,res4],
11538 vect2: [res5,res6,res7,res8].
11540 However, in the special case that the result of the widening
11541 operation is used in a reduction computation only, the order doesn't
11542 matter (because when vectorizing a reduction we change the order of
11543 the computation). Some targets can take advantage of this and
11544 generate more efficient code. For example, targets like Altivec,
11545 that support widen_mult using a sequence of {mult_even,mult_odd}
11546 generate the following vectors:
11547 vect1: [res1,res3,res5,res7],
11548 vect2: [res2,res4,res6,res8].
11550 When vectorizing outer-loops, we execute the inner-loop sequentially
11551 (each vectorized inner-loop iteration contributes to VF outer-loop
11552 iterations in parallel). We therefore don't allow to change the
11553 order of the computation in the inner-loop during outer-loop
11554 vectorization. */
11555 /* TODO: Another case in which order doesn't *really* matter is when we
11556 widen and then contract again, e.g. (short)((int)x * y >> 8).
11557 Normally, pack_trunc performs an even/odd permute, whereas the
11558 repack from an even/odd expansion would be an interleave, which
11559 would be significantly simpler for e.g. AVX2. */
11560 /* In any case, in order to avoid duplicating the code below, recurse
11561 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11562 are properly set up for the caller. If we fail, we'll continue with
11563 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11564 if (vect_loop
11565 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11566 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11567 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11568 stmt_info, vectype_out,
11569 vectype_in, code1, code2,
11570 multi_step_cvt, interm_types))
11572 /* Elements in a vector with vect_used_by_reduction property cannot
11573 be reordered if the use chain with this property does not have the
11574 same operation. One such an example is s += a * b, where elements
11575 in a and b cannot be reordered. Here we check if the vector defined
11576 by STMT is only directly used in the reduction statement. */
11577 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11578 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11579 if (use_stmt_info
11580 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11581 return true;
11583 c1 = VEC_WIDEN_MULT_LO_EXPR;
11584 c2 = VEC_WIDEN_MULT_HI_EXPR;
11585 break;
11587 case DOT_PROD_EXPR:
11588 c1 = DOT_PROD_EXPR;
11589 c2 = DOT_PROD_EXPR;
11590 break;
11592 case SAD_EXPR:
11593 c1 = SAD_EXPR;
11594 c2 = SAD_EXPR;
11595 break;
11597 case VEC_WIDEN_MULT_EVEN_EXPR:
11598 /* Support the recursion induced just above. */
11599 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11600 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11601 break;
11603 case WIDEN_LSHIFT_EXPR:
11604 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11605 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11606 break;
11608 CASE_CONVERT:
11609 c1 = VEC_UNPACK_LO_EXPR;
11610 c2 = VEC_UNPACK_HI_EXPR;
11611 break;
11613 case FLOAT_EXPR:
11614 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11615 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11616 break;
11618 case FIX_TRUNC_EXPR:
11619 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11620 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11621 break;
11623 default:
11624 gcc_unreachable ();
11627 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11628 std::swap (c1, c2);
11630 if (code == FIX_TRUNC_EXPR)
11632 /* The signedness is determined from output operand. */
11633 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11634 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11636 else if (CONVERT_EXPR_CODE_P (code)
11637 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11638 && VECTOR_BOOLEAN_TYPE_P (vectype)
11639 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11640 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11642 /* If the input and result modes are the same, a different optab
11643 is needed where we pass in the number of units in vectype. */
11644 optab1 = vec_unpacks_sbool_lo_optab;
11645 optab2 = vec_unpacks_sbool_hi_optab;
11647 else
11649 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11650 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11653 if (!optab1 || !optab2)
11654 return false;
11656 vec_mode = TYPE_MODE (vectype);
11657 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11658 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11659 return false;
11661 *code1 = c1;
11662 *code2 = c2;
11664 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11665 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11667 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11668 return true;
11669 /* For scalar masks we may have different boolean
11670 vector types having the same QImode. Thus we
11671 add additional check for elements number. */
11672 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11673 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11674 return true;
11677 /* Check if it's a multi-step conversion that can be done using intermediate
11678 types. */
11680 prev_type = vectype;
11681 prev_mode = vec_mode;
11683 if (!CONVERT_EXPR_CODE_P (code))
11684 return false;
11686 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11687 intermediate steps in promotion sequence. We try
11688 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11689 not. */
11690 interm_types->create (MAX_INTERM_CVT_STEPS);
11691 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11693 intermediate_mode = insn_data[icode1].operand[0].mode;
11694 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11695 intermediate_type
11696 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11697 else
11698 intermediate_type
11699 = lang_hooks.types.type_for_mode (intermediate_mode,
11700 TYPE_UNSIGNED (prev_type));
11702 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11703 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11704 && intermediate_mode == prev_mode
11705 && SCALAR_INT_MODE_P (prev_mode))
11707 /* If the input and result modes are the same, a different optab
11708 is needed where we pass in the number of units in vectype. */
11709 optab3 = vec_unpacks_sbool_lo_optab;
11710 optab4 = vec_unpacks_sbool_hi_optab;
11712 else
11714 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11715 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11718 if (!optab3 || !optab4
11719 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11720 || insn_data[icode1].operand[0].mode != intermediate_mode
11721 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11722 || insn_data[icode2].operand[0].mode != intermediate_mode
11723 || ((icode1 = optab_handler (optab3, intermediate_mode))
11724 == CODE_FOR_nothing)
11725 || ((icode2 = optab_handler (optab4, intermediate_mode))
11726 == CODE_FOR_nothing))
11727 break;
11729 interm_types->quick_push (intermediate_type);
11730 (*multi_step_cvt)++;
11732 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11733 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11735 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11736 return true;
11737 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11738 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11739 return true;
11742 prev_type = intermediate_type;
11743 prev_mode = intermediate_mode;
11746 interm_types->release ();
11747 return false;
11751 /* Function supportable_narrowing_operation
11753 Check whether an operation represented by the code CODE is a
11754 narrowing operation that is supported by the target platform in
11755 vector form (i.e., when operating on arguments of type VECTYPE_IN
11756 and producing a result of type VECTYPE_OUT).
11758 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11759 and FLOAT. This function checks if these operations are supported by
11760 the target platform directly via vector tree-codes.
11762 Output:
11763 - CODE1 is the code of a vector operation to be used when
11764 vectorizing the operation, if available.
11765 - MULTI_STEP_CVT determines the number of required intermediate steps in
11766 case of multi-step conversion (like int->short->char - in that case
11767 MULTI_STEP_CVT will be 1).
11768 - INTERM_TYPES contains the intermediate type required to perform the
11769 narrowing operation (short in the above example). */
11771 bool
11772 supportable_narrowing_operation (enum tree_code code,
11773 tree vectype_out, tree vectype_in,
11774 enum tree_code *code1, int *multi_step_cvt,
11775 vec<tree> *interm_types)
11777 machine_mode vec_mode;
11778 enum insn_code icode1;
11779 optab optab1, interm_optab;
11780 tree vectype = vectype_in;
11781 tree narrow_vectype = vectype_out;
11782 enum tree_code c1;
11783 tree intermediate_type, prev_type;
11784 machine_mode intermediate_mode, prev_mode;
11785 int i;
11786 bool uns;
11788 *multi_step_cvt = 0;
11789 switch (code)
11791 CASE_CONVERT:
11792 c1 = VEC_PACK_TRUNC_EXPR;
11793 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11794 && VECTOR_BOOLEAN_TYPE_P (vectype)
11795 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11796 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11797 optab1 = vec_pack_sbool_trunc_optab;
11798 else
11799 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11800 break;
11802 case FIX_TRUNC_EXPR:
11803 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11804 /* The signedness is determined from output operand. */
11805 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11806 break;
11808 case FLOAT_EXPR:
11809 c1 = VEC_PACK_FLOAT_EXPR;
11810 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11811 break;
11813 default:
11814 gcc_unreachable ();
11817 if (!optab1)
11818 return false;
11820 vec_mode = TYPE_MODE (vectype);
11821 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11822 return false;
11824 *code1 = c1;
11826 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11828 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11829 return true;
11830 /* For scalar masks we may have different boolean
11831 vector types having the same QImode. Thus we
11832 add additional check for elements number. */
11833 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11834 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11835 return true;
11838 if (code == FLOAT_EXPR)
11839 return false;
11841 /* Check if it's a multi-step conversion that can be done using intermediate
11842 types. */
11843 prev_mode = vec_mode;
11844 prev_type = vectype;
11845 if (code == FIX_TRUNC_EXPR)
11846 uns = TYPE_UNSIGNED (vectype_out);
11847 else
11848 uns = TYPE_UNSIGNED (vectype);
11850 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11851 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11852 costly than signed. */
11853 if (code == FIX_TRUNC_EXPR && uns)
11855 enum insn_code icode2;
11857 intermediate_type
11858 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11859 interm_optab
11860 = optab_for_tree_code (c1, intermediate_type, optab_default);
11861 if (interm_optab != unknown_optab
11862 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11863 && insn_data[icode1].operand[0].mode
11864 == insn_data[icode2].operand[0].mode)
11866 uns = false;
11867 optab1 = interm_optab;
11868 icode1 = icode2;
11872 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11873 intermediate steps in promotion sequence. We try
11874 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11875 interm_types->create (MAX_INTERM_CVT_STEPS);
11876 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11878 intermediate_mode = insn_data[icode1].operand[0].mode;
11879 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11880 intermediate_type
11881 = vect_double_mask_nunits (prev_type, intermediate_mode);
11882 else
11883 intermediate_type
11884 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11885 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11886 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11887 && intermediate_mode == prev_mode
11888 && SCALAR_INT_MODE_P (prev_mode))
11889 interm_optab = vec_pack_sbool_trunc_optab;
11890 else
11891 interm_optab
11892 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11893 optab_default);
11894 if (!interm_optab
11895 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11896 || insn_data[icode1].operand[0].mode != intermediate_mode
11897 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11898 == CODE_FOR_nothing))
11899 break;
11901 interm_types->quick_push (intermediate_type);
11902 (*multi_step_cvt)++;
11904 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11906 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11907 return true;
11908 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11909 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11910 return true;
11913 prev_mode = intermediate_mode;
11914 prev_type = intermediate_type;
11915 optab1 = interm_optab;
11918 interm_types->release ();
11919 return false;
11922 /* Generate and return a statement that sets vector mask MASK such that
11923 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11925 gcall *
11926 vect_gen_while (tree mask, tree start_index, tree end_index)
11928 tree cmp_type = TREE_TYPE (start_index);
11929 tree mask_type = TREE_TYPE (mask);
11930 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11931 cmp_type, mask_type,
11932 OPTIMIZE_FOR_SPEED));
11933 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11934 start_index, end_index,
11935 build_zero_cst (mask_type));
11936 gimple_call_set_lhs (call, mask);
11937 return call;
11940 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11941 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11943 tree
11944 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11945 tree end_index)
11947 tree tmp = make_ssa_name (mask_type);
11948 gcall *call = vect_gen_while (tmp, start_index, end_index);
11949 gimple_seq_add_stmt (seq, call);
11950 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11953 /* Try to compute the vector types required to vectorize STMT_INFO,
11954 returning true on success and false if vectorization isn't possible.
11955 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11956 take sure that the number of elements in the vectors is no bigger
11957 than GROUP_SIZE.
11959 On success:
11961 - Set *STMT_VECTYPE_OUT to:
11962 - NULL_TREE if the statement doesn't need to be vectorized;
11963 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11965 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11966 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11967 statement does not help to determine the overall number of units. */
11969 opt_result
11970 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
11971 tree *stmt_vectype_out,
11972 tree *nunits_vectype_out,
11973 unsigned int group_size)
11975 gimple *stmt = stmt_info->stmt;
11977 /* For BB vectorization, we should always have a group size once we've
11978 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11979 are tentative requests during things like early data reference
11980 analysis and pattern recognition. */
11981 if (is_a <bb_vec_info> (vinfo))
11982 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11983 else
11984 group_size = 0;
11986 *stmt_vectype_out = NULL_TREE;
11987 *nunits_vectype_out = NULL_TREE;
11989 if (gimple_get_lhs (stmt) == NULL_TREE
11990 /* MASK_STORE has no lhs, but is ok. */
11991 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11993 if (is_a <gcall *> (stmt))
11995 /* Ignore calls with no lhs. These must be calls to
11996 #pragma omp simd functions, and what vectorization factor
11997 it really needs can't be determined until
11998 vectorizable_simd_clone_call. */
11999 if (dump_enabled_p ())
12000 dump_printf_loc (MSG_NOTE, vect_location,
12001 "defer to SIMD clone analysis.\n");
12002 return opt_result::success ();
12005 return opt_result::failure_at (stmt,
12006 "not vectorized: irregular stmt.%G", stmt);
12009 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12010 return opt_result::failure_at (stmt,
12011 "not vectorized: vector stmt in loop:%G",
12012 stmt);
12014 tree vectype;
12015 tree scalar_type = NULL_TREE;
12016 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12018 vectype = STMT_VINFO_VECTYPE (stmt_info);
12019 if (dump_enabled_p ())
12020 dump_printf_loc (MSG_NOTE, vect_location,
12021 "precomputed vectype: %T\n", vectype);
12023 else if (vect_use_mask_type_p (stmt_info))
12025 unsigned int precision = stmt_info->mask_precision;
12026 scalar_type = build_nonstandard_integer_type (precision, 1);
12027 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12028 if (!vectype)
12029 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12030 " data-type %T\n", scalar_type);
12031 if (dump_enabled_p ())
12032 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12034 else
12036 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12037 scalar_type = TREE_TYPE (DR_REF (dr));
12038 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12039 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12040 else
12041 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12043 if (dump_enabled_p ())
12045 if (group_size)
12046 dump_printf_loc (MSG_NOTE, vect_location,
12047 "get vectype for scalar type (group size %d):"
12048 " %T\n", group_size, scalar_type);
12049 else
12050 dump_printf_loc (MSG_NOTE, vect_location,
12051 "get vectype for scalar type: %T\n", scalar_type);
12053 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12054 if (!vectype)
12055 return opt_result::failure_at (stmt,
12056 "not vectorized:"
12057 " unsupported data-type %T\n",
12058 scalar_type);
12060 if (dump_enabled_p ())
12061 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12063 *stmt_vectype_out = vectype;
12065 /* Don't try to compute scalar types if the stmt produces a boolean
12066 vector; use the existing vector type instead. */
12067 tree nunits_vectype = vectype;
12068 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12070 /* The number of units is set according to the smallest scalar
12071 type (or the largest vector size, but we only support one
12072 vector size per vectorization). */
12073 HOST_WIDE_INT dummy;
12074 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12075 if (scalar_type != TREE_TYPE (vectype))
12077 if (dump_enabled_p ())
12078 dump_printf_loc (MSG_NOTE, vect_location,
12079 "get vectype for smallest scalar type: %T\n",
12080 scalar_type);
12081 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12082 group_size);
12083 if (!nunits_vectype)
12084 return opt_result::failure_at
12085 (stmt, "not vectorized: unsupported data-type %T\n",
12086 scalar_type);
12087 if (dump_enabled_p ())
12088 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12089 nunits_vectype);
12093 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12094 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12096 if (dump_enabled_p ())
12098 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12099 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12100 dump_printf (MSG_NOTE, "\n");
12103 *nunits_vectype_out = nunits_vectype;
12104 return opt_result::success ();
12107 /* Generate and return statement sequence that sets vector length LEN that is:
12109 min_of_start_and_end = min (START_INDEX, END_INDEX);
12110 left_len = END_INDEX - min_of_start_and_end;
12111 rhs = min (left_len, LEN_LIMIT);
12112 LEN = rhs;
12114 Note: the cost of the code generated by this function is modeled
12115 by vect_estimate_min_profitable_iters, so changes here may need
12116 corresponding changes there. */
12118 gimple_seq
12119 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12121 gimple_seq stmts = NULL;
12122 tree len_type = TREE_TYPE (len);
12123 gcc_assert (TREE_TYPE (start_index) == len_type);
12125 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12126 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12127 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12128 gimple* stmt = gimple_build_assign (len, rhs);
12129 gimple_seq_add_stmt (&stmts, stmt);
12131 return stmts;